{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 数据预处理-基于Python"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 一、数据框的查找"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "    state  year  pop\n0    Ohio  2000  1.5\n1    Ohio  2001  1.7\n2    Ohio  2002  3.6\n3  Nevada  2001  2.4\n4  Nevada  2002  2.9",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>state</th>\n      <th>year</th>\n      <th>pop</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Ohio</td>\n      <td>2000</td>\n      <td>1.5</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Ohio</td>\n      <td>2001</td>\n      <td>1.7</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Ohio</td>\n      <td>2002</td>\n      <td>3.6</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Nevada</td>\n      <td>2001</td>\n      <td>2.4</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Nevada</td>\n      <td>2002</td>\n      <td>2.9</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],\n",
    "        'year':[2000, 2001, 2002, 2001, 2002, 2003],\n",
    "        'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}\n",
    "df = pd.DataFrame(data) # 生成数据框\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "state     object\n",
      "year       int64\n",
      "pop      float64\n",
      "dtype: object\n",
      "state    category\n",
      "year        int64\n",
      "pop       float64\n",
      "dtype: object\n"
     ]
    },
    {
     "data": {
      "text/plain": "    state  year  pop\n0    Ohio  2000  1.5\n1    Ohio  2001  1.7\n2    Ohio  2002  3.6\n3  Nevada  2001  2.4\n4  Nevada  2002  2.9\n5  Nevada  2003  3.2",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>state</th>\n      <th>year</th>\n      <th>pop</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Ohio</td>\n      <td>2000</td>\n      <td>1.5</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Ohio</td>\n      <td>2001</td>\n      <td>1.7</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Ohio</td>\n      <td>2002</td>\n      <td>3.6</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Nevada</td>\n      <td>2001</td>\n      <td>2.4</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Nevada</td>\n      <td>2002</td>\n      <td>2.9</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Nevada</td>\n      <td>2003</td>\n      <td>3.2</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(df.dtypes) #查看每列变量的数据类型\n",
    "df['state']=df['state'].astype(\"category\") # 转换数据类型为类别类型或名义变量或因子类型。\n",
    "df['year'] = pd.to_datetime(df['year'].apply(str),format=\"%Y-%m-%d\").dt.year #首先转换为字符串，然后只让显示年份。\n",
    "print(df.dtypes)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "Index(['state', 'year', 'pop'], dtype='object')"
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns # 查看列变量名"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "(6, 3)"
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.shape # 数据的行和列数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "['Ohio', 'Nevada']\nCategories (2, object): ['Ohio', 'Nevada']"
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['state'].unique() # 查看某一列的取值种类, 并且可以找出错乱的字符"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "Nevada    3\nOhio      3\nName: state, dtype: int64"
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['state'].value_counts() #查看某列每个取值出现的次数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "    state  year  pop  GDP\n0    Ohio  2000  1.5    1\n1    Ohio  2001  1.7    2\n2    Ohio  2002  3.6    3\n3  Nevada  2001  2.4    4\n4  Nevada  2002  2.9    5",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>state</th>\n      <th>year</th>\n      <th>pop</th>\n      <th>GDP</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Ohio</td>\n      <td>2000</td>\n      <td>1.5</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Ohio</td>\n      <td>2001</td>\n      <td>1.7</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Ohio</td>\n      <td>2002</td>\n      <td>3.6</td>\n      <td>3</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Nevada</td>\n      <td>2001</td>\n      <td>2.4</td>\n      <td>4</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Nevada</td>\n      <td>2002</td>\n      <td>2.9</td>\n      <td>5</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "GDP=np.arange(df.shape[0])+1 # 添加新的变量\n",
    "df.insert(3,'GDP',GDP)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "    state  year  pop  GDP\na    Ohio  2000  1.5    1\nb    Ohio  2001  1.7    2\nc    Ohio  2002  3.6    3\nd  Nevada  2001  2.4    4\ne  Nevada  2002  2.9    5",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>state</th>\n      <th>year</th>\n      <th>pop</th>\n      <th>GDP</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>a</th>\n      <td>Ohio</td>\n      <td>2000</td>\n      <td>1.5</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>b</th>\n      <td>Ohio</td>\n      <td>2001</td>\n      <td>1.7</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>c</th>\n      <td>Ohio</td>\n      <td>2002</td>\n      <td>3.6</td>\n      <td>3</td>\n    </tr>\n    <tr>\n      <th>d</th>\n      <td>Nevada</td>\n      <td>2001</td>\n      <td>2.4</td>\n      <td>4</td>\n    </tr>\n    <tr>\n      <th>e</th>\n      <td>Nevada</td>\n      <td>2002</td>\n      <td>2.9</td>\n      <td>5</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.index=['a','b','c','d','e','f'] # 定义新的索引\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "state    Ohio\n",
      "year     2000\n",
      "pop       1.5\n",
      "GDP         1\n",
      "Name: a, dtype: object\n",
      "a    Ohio\n",
      "b    Ohio\n",
      "Name: state, dtype: category\n",
      "Categories (2, object): ['Nevada', 'Ohio']\n"
     ]
    },
    {
     "data": {
      "text/plain": "   year  GDP\na  2000    1\nb  2001    2",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>year</th>\n      <th>GDP</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>a</th>\n      <td>2000</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>b</th>\n      <td>2001</td>\n      <td>2</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(df.loc['a']) # location是位置的意思，即通过索引来查看行\n",
    "print(df.iloc[0:2, 0]) # \"i\"是integer的缩写，写出行数来查看。其实等同于直接df[0:2]\n",
    "df.loc[['a','b'],['year','GDP']] # 查看指定的行和列\n",
    "#df.iloc[[0,2],[1,3]] # 指定数量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a     True\n",
      "b     True\n",
      "c     True\n",
      "d    False\n",
      "e    False\n",
      "f    False\n",
      "Name: state, dtype: bool\n"
     ]
    },
    {
     "data": {
      "text/plain": "  state  year  pop  GDP\na  Ohio  2000  1.5    1\nb  Ohio  2001  1.7    2\nc  Ohio  2002  3.6    3",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>state</th>\n      <th>year</th>\n      <th>pop</th>\n      <th>GDP</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>a</th>\n      <td>Ohio</td>\n      <td>2000</td>\n      <td>1.5</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>b</th>\n      <td>Ohio</td>\n      <td>2001</td>\n      <td>1.7</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>c</th>\n      <td>Ohio</td>\n      <td>2002</td>\n      <td>3.6</td>\n      <td>3</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(df['state']==\"Ohio\")\n",
    "df[df['state']==\"Ohio\"] # 查看满足条件的所有行"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 二、数据框修改"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "    state  year\na    Ohio  2000\nb    Ohio  2001\nc    Ohio  2002\nd  Nevada  2001\ne  Nevada  2002",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>state</th>\n      <th>year</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>a</th>\n      <td>Ohio</td>\n      <td>2000</td>\n    </tr>\n    <tr>\n      <th>b</th>\n      <td>Ohio</td>\n      <td>2001</td>\n    </tr>\n    <tr>\n      <th>c</th>\n      <td>Ohio</td>\n      <td>2002</td>\n    </tr>\n    <tr>\n      <th>d</th>\n      <td>Nevada</td>\n      <td>2001</td>\n    </tr>\n    <tr>\n      <th>e</th>\n      <td>Nevada</td>\n      <td>2002</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_df=df.drop(['pop','GDP'],axis=1) # 丢弃列数据\n",
    "new_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": "    state  year  pop  GDP\na    Ohio  2000  1.5    1\nb    Ohio  2001  1.7   10\nc    Ohio  2002  3.6    3\nd  Nevada  2001  2.4    4\ne  Nevada  2002  2.9    5",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>state</th>\n      <th>year</th>\n      <th>pop</th>\n      <th>GDP</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>a</th>\n      <td>Ohio</td>\n      <td>2000</td>\n      <td>1.5</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>b</th>\n      <td>Ohio</td>\n      <td>2001</td>\n      <td>1.7</td>\n      <td>10</td>\n    </tr>\n    <tr>\n      <th>c</th>\n      <td>Ohio</td>\n      <td>2002</td>\n      <td>3.6</td>\n      <td>3</td>\n    </tr>\n    <tr>\n      <th>d</th>\n      <td>Nevada</td>\n      <td>2001</td>\n      <td>2.4</td>\n      <td>4</td>\n    </tr>\n    <tr>\n      <th>e</th>\n      <td>Nevada</td>\n      <td>2002</td>\n      <td>2.9</td>\n      <td>5</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[\"b\",\"GDP\"]=10 # 赋值\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   one  two  three\n",
      "a    0    1      2\n",
      "b    3    4      5\n",
      "c    6    7      8\n"
     ]
    }
   ],
   "source": [
    "f = lambda x : x.max() - x.min()   \n",
    "a = np.arange(9).reshape(3,3)    \n",
    "data = pd.DataFrame(a,index=[\"a\",\"b\",\"c\"],columns=[\"one\",\"two\",\"three\"])   \n",
    "print(data)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "one      6\n",
      "two      6\n",
      "three    6\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "print(data.apply(f)) # 选择列中最大最小值之差"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    2\n",
      "b    2\n",
      "c    2\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "print(data.apply(f,axis=1)) # 选择行中最大最小值之差"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "one       9\n",
      "two      12\n",
      "three    15\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "print(data.sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "one      3.0\n",
      "two      4.0\n",
      "three    5.0\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "print(data.mean())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 三、缺失值处理\n",
    "在对缺失数据进行处理前，了解数据缺失的机制和形式是十分必要的。将数据集中不含缺失值的变量称为完全变量，数据集中含有缺失值的变量称为不完全变量。而从缺失的分布来将缺失可以分为完全随机缺失，随机缺失和完全非随机缺失。\n",
    "* 完全随机缺失（missing completely at random,MCAR）：指的是数据的缺失是完全随机的，不依赖于任何不完全变量或完全变量，不影响样本的无偏性，如家庭地址缺失；\n",
    "* 随机缺失(missing at random,MAR)：指的是数据的缺失不是完全随机的，即该类数据的缺失依赖于其他完全变量，如财务数据缺失情况与企业的大小有关；\n",
    "* 非随机缺失(missing not at random,MNAR)：指的是数据的缺失与不完全变量自身的取值有关，如高收入人群不原意提供家庭收入；\n",
    "\n",
    "对于随机缺失和非随机缺失，直接删除记录是不合适的。随机缺失可以通过已知变量对缺失值进行估计，而非随机缺失的非随机性还没有很好的解决办法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "   one  two  three  four\na    0    1      2  10.0\nb    3    4      5   NaN\nc    6    7      8  20.0",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>one</th>\n      <th>two</th>\n      <th>three</th>\n      <th>four</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>a</th>\n      <td>0</td>\n      <td>1</td>\n      <td>2</td>\n      <td>10.0</td>\n    </tr>\n    <tr>\n      <th>b</th>\n      <td>3</td>\n      <td>4</td>\n      <td>5</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>c</th>\n      <td>6</td>\n      <td>7</td>\n      <td>8</td>\n      <td>20.0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[\"four\"]=[10,np.nan,20]\n",
    "data.head()\n",
    "#data.isnull() # 查看缺失值位置"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   one  two  three  four\n",
      "a    0    1      2  10.0\n",
      "b    3    4      5   NaN\n",
      "c    6    7      8  20.0\n",
      "   one  two  three  four\n",
      "a    0    1      2  10.0\n",
      "b    3    4      5   NaN\n",
      "c    6    7      8  20.0\n"
     ]
    }
   ],
   "source": [
    "data.dropna(how='any') # 去掉存在缺失值所有行\n",
    "print(data)\n",
    "data.dropna(how=\"all\")  # 去掉都是缺失值所在的行\n",
    "print(data)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n"
     ]
    },
    {
     "data": {
      "text/plain": "   one  two  three  four\na    0    1      2  10.0\nb    3    4      5   NaN\nc    6    7      8  20.0",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>one</th>\n      <th>two</th>\n      <th>three</th>\n      <th>four</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>a</th>\n      <td>0</td>\n      <td>1</td>\n      <td>2</td>\n      <td>10.0</td>\n    </tr>\n    <tr>\n      <th>b</th>\n      <td>3</td>\n      <td>4</td>\n      <td>5</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>c</th>\n      <td>6</td>\n      <td>7</td>\n      <td>8</td>\n      <td>20.0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#统计重复记录数\n",
    "print(data.duplicated().sum())\n",
    "data.drop_duplicates()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   one  two  three   four\n",
      "a    0    1      2   10.0\n",
      "b    3    4      5  100.0\n",
      "c    6    7      8   20.0\n"
     ]
    },
    {
     "data": {
      "text/plain": "   one  two  three  four\na    0    1      2  10.0\nb    3    4      5  15.0\nc    6    7      8  20.0",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>one</th>\n      <th>two</th>\n      <th>three</th>\n      <th>four</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>a</th>\n      <td>0</td>\n      <td>1</td>\n      <td>2</td>\n      <td>10.0</td>\n    </tr>\n    <tr>\n      <th>b</th>\n      <td>3</td>\n      <td>4</td>\n      <td>5</td>\n      <td>15.0</td>\n    </tr>\n    <tr>\n      <th>c</th>\n      <td>6</td>\n      <td>7</td>\n      <td>8</td>\n      <td>20.0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(data.fillna(100))\n",
    "data.fillna(data.mean())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   one  two  three  four\n",
      "a    0    1      2  10.0\n",
      "b    3    4      5  15.0\n",
      "c    6    7      8  20.0\n"
     ]
    }
   ],
   "source": [
    "print(data.fillna({'one':1,'two':2,'four':data['four'].mean()})) # 定义字典以不同形式填充每列缺失值"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 四、异常值识别与处理\n",
    "模型通常是对整体样本数据结构的一种表达方式，这种表达方式通常抓住的是整体样本一般性的性质，而那些在这些性质上表现完全与整体样本不一致的点，我们就称其为异常点。一般异常值的检测方法有基于统计的方法，基于聚类的方法，以及一些专门检测异常值的方法等。\n",
    "\n",
    "常用的异常值处理方式包括：\n",
    "* 删除含有异常值的记录：直接将含有异常值的记录删除；\n",
    "* 视为缺失值：将异常值视为缺失值，利用缺失值处理的方法进行处理；\n",
    "* 平均值修正：可用前后两个观测值的平均值修正该异常值；\n",
    "* 不处理：直接在具有异常值的数据集上进行数据挖掘；"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 20 entries, 0 to 19\n",
      "Data columns (total 5 columns):\n",
      " #   Column  Non-Null Count  Dtype\n",
      "---  ------  --------------  -----\n",
      " 0   num1    20 non-null     int32\n",
      " 1   num2    20 non-null     int32\n",
      " 2   num3    20 non-null     int32\n",
      " 3   num4    20 non-null     int32\n",
      " 4   num5    20 non-null     int32\n",
      "dtypes: int32(5)\n",
      "memory usage: 528.0 bytes\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "a=np.arange(100).reshape(20,5)\n",
    "df1=pd.DataFrame(a, columns=[\"num1\",\"num2\",\"num3\",\"num4\",\"num5\"])\n",
    "df1.info() # or data.describe() 对数据整体进行描述"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "<AxesSubplot:>"
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": "<Figure size 432x288 with 1 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAATcklEQVR4nO3df4zkd33f8eeLO4dQH+JHTbaOfcqhxG0PLomBrVGU/rEbN8TQqIaWUK8iAmWri1pskYJUDCc1pHQl04SgRiFOz12E09K7WEkoLhhi190RQsIhtjE29kK5wrn4dNjhl/FCirj1u3/s156tubvd25nZGX/2+ZBG853P98e8530zr/3qO9/vXKoKSVJbnjHuAiRJw2e4S1KDDHdJapDhLkkNMtwlqUG7x10AwAUXXFD79u0bdxl897vf5fzzzx93GRPBXvTZiz570TcJvbjrrru+XlUvON28iQj3ffv2ceedd467DHq9HjMzM+MuYyLYiz570Wcv+iahF0kePNM8D8tIUoMMd0lqkOEuSQ0y3CWpQYa7JDVow3BP8qNJPpPkc0nuT/Jb3fgLk/xFkmNJ/jjJj3Tjz+weH+vm7xvxa5A0YkeOHOHAgQNcfvnlHDhwgCNHjoy7JG1gM6dCfh/4hapaSXIe8KkkHwfeCryvqo4m+UNgHri+u/9WVf1UkquA9wD/dET1SxqxI0eOcOjQIRYXF1ldXWXXrl3Mz88DMDc3N+bqdCYb7rnXmpXu4XndrYBfAP6kG78ReHU3fWX3mG7+5UkyrIIlba+FhQUWFxeZnZ1l9+7dzM7Osri4yMLCwrhL01ls6iKmJLuAu4CfAt4P/G/g21V1qlvkIeCibvoi4KsAVXUqyaPA3wS+/pRtHgQOAkxNTdHr9QZ6IcOwsrIyEXVMAnvRt9N7sby8zOrqKr1e78lerK6usry8vKP7Munvi02Fe1WtApcmeS7wYeDvDvrEVXUYOAwwPT1d477SCybjirNJYS/6dnov9u/fz65du5iZmXmyF0tLS+zfv39H92XS3xfndLZMVX0bWAJ+Dnhukif+OFwMnOimTwB7Abr5zwG+MYxiJW2/Q4cOMT8/z9LSEqdOnWJpaYn5+XkOHTo07tJ0FhvuuSd5AfCDqvp2kmcBv8jal6RLwGuBo8AbgI90q9zcPf50N/9/lv+Xn/S09cSXptdccw3Ly8vs37+fhYUFv0ydcJs5LHMhcGN33P0ZwE1V9dEkDwBHk/w74LPAYrf8IvCfkxwDvglcNYK6JW2jubk55ubmJv5QhPo2DPequhd4yWnGvwxcdprx/wv8ylCqkyRtiVeoSlKDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGrRhuCfZm2QpyQNJ7k/ylm78XUlOJLmnu71q3TrvSHIsyReT/NIoX4Ak6Yft3sQyp4C3VdXdSZ4N3JXktm7e+6rqd9YvnORFwFXAi4EfB/5Hkr9dVavDLFySdGYb7rlX1cmqurubfgxYBi46yypXAker6vtV9RXgGHDZMIqVJG1OqmrzCyf7gE8CB4C3Am8EvgPcydre/beS/D5wR1X9l26dReDjVfUnT9nWQeAgwNTU1MuOHj068IsZ1MrKCnv27Bl3GRPBXvTZiz570TcJvZidnb2rqqZPN28zh2UASLIH+FPgN6rqO0muB94NVHf/XuBNm91eVR0GDgNMT0/XzMzMZlcdmV6vxyTUMQnsRZ+96LMXfZPei02dLZPkPNaC/UNV9WcAVfVwVa1W1ePADfQPvZwA9q5b/eJuTJK0TTZztkyARWC5qn533fiF6xZ7DfD5bvpm4Kokz0zyQuAS4DPDK1mStJHNHJb5eeD1wH1J7unG3gnMJbmUtcMyx4FfB6iq+5PcBDzA2pk2b/ZMGUnaXhuGe1V9CshpZt1ylnUWgIUB6pIkDcArVCWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ3aMNyT7E2ylOSBJPcneUs3/vwktyX5Unf/vG48SX4vybEk9yZ56ahfhKTROnLkCAcOHODyyy/nwIEDHDlyZNwlaQO7N7HMKeBtVXV3kmcDdyW5DXgjcHtVXZfkWuBa4O3AK4FLutvLgeu7e0lPQ0eOHOHQoUMsLi6yurrKrl27mJ+fB2Bubm7M1elMNtxzr6qTVXV3N/0YsAxcBFwJ3NgtdiPw6m76SuCPas0dwHOTXDjswiVtj4WFBRYXF5mdnWX37t3Mzs6yuLjIwsLCuEvTWWxmz/1JSfYBLwH+ApiqqpPdrK8BU930RcBX1632UDd2ct0YSQ4CBwGmpqbo9XrnWPrwraysTEQdk8Be9O30XiwvL7O6ukqv13uyF6urqywvL+/ovkz6+2LT4Z5kD/CnwG9U1XeSPDmvqipJncsTV9Vh4DDA9PR0zczMnMvqI9Hr9ZiEOiaBvejb6b3Yv38/u3btYmZm5sleLC0tsX///h3dl0l/X2zqbJkk57EW7B+qqj/rhh9+4nBLd/9IN34C2Ltu9Yu7MUlPQ4cOHWJ+fp6lpSVOnTrF0tIS8/PzHDp0aNyl6Sw23HPP2i76IrBcVb+7btbNwBuA67r7j6wbvzrJUda+SH103eEbSU8zT3xpes0117C8vMz+/ftZWFjwy9QJt5nDMj8PvB64L8k93dg7WQv1m5LMAw8Cr+vm3QK8CjgGfA/4Z8MsWNL2m5ubY25ubuIPRahvw3Cvqk8BOcPsy0+zfAFvHrAuSdIAvEJVkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUoA3DPckHkjyS5PPrxt6V5ESSe7rbq9bNe0eSY0m+mOSXRlW4JOnMNrPn/kHgitOMv6+qLu1utwAkeRFwFfDibp0/SLJrWMVKkjZnw3Cvqk8C39zk9q4EjlbV96vqK8Ax4LIB6pMkbcHuAda9OsmvAXcCb6uqbwEXAXesW+ahbuyHJDkIHASYmpqi1+sNUMpwrKysTEQdk8Be9NmLPnvRN+m92Gq4Xw+8G6ju/r3Am85lA1V1GDgMMD09XTMzM1ssZXh6vR6TUMcksBd99qLPXvRNei+2dLZMVT1cVatV9ThwA/1DLyeAvesWvbgbkyRtoy2Fe5IL1z18DfDEmTQ3A1cleWaSFwKXAJ8ZrERJ0rna8LBMkiPADHBBkoeA3wRmklzK2mGZ48CvA1TV/UluAh4ATgFvrqrVkVQuSTqjDcO9quZOM7x4luUXgIVBipIkDcYrVCWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ3aMNyTfCDJI0k+v27s+UluS/Kl7v553XiS/F6SY0nuTfLSURYvSTq9zey5fxC44ilj1wK3V9UlwO3dY4BXApd0t4PA9cMpU5J0LjYM96r6JPDNpwxfCdzYTd8IvHrd+B/VmjuA5ya5cEi1SpI2afcW15uqqpPd9NeAqW76IuCr65Z7qBs7yVMkOcja3j1TU1P0er0tljI8KysrE1HHJLAXfTulF7Ozs0PZztLS0lC2M+km/X2x1XB/UlVVktrCeoeBwwDT09M1MzMzaCkD6/V6TEIdk8Be9O2UXlRt/DHed+3HOH7dP9yGasYryVC2s5mejspWw/3hJBdW1cnusMsj3fgJYO+65S7uxqSJ1MKHWMPXwh+6rYb7zcAbgOu6+4+sG786yVHg5cCj6w7fjJUf4j570dfCh1g6nc2cCnkE+DTwd5I8lGSetVD/xSRfAv5B9xjgFuDLwDHgBuBfjqTqLaiqDW8/8faPbrhMC+yF1L4N99yrau4Msy4/zbIFvHnQoiRJg/EKVUlq0MBny0h6evjZ37qVR//6BwNvZ9+1H9vyus951nl87jdfMXAN2pjhLu0Qj/71Dwb+YnjQ00IH+cOgc+NhGUlqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGuR57pJ2nJ1wQZfhLmnH2QkXdBnuDdoJeyWbZS+0UzUT7n6I+3bCXslm2QvtVM2Eux9iSerzbBlJapDhLkkNauawjKSze/b+a/npG68dfEM3DlIDgP8f7XYw3KUd4rHl6/xeagfxsIwkNchwl6QGGe6S1CDDXZIaZLhLUoMGOlsmyXHgMWAVOFVV00meD/wxsA84Dryuqr41WJmSpHMxjD332aq6tKqmu8fXArdX1SXA7d1jSdI2GsV57lcCM930jUAPePsInkeStmQnXNA1aLgXcGuSAv5jVR0GpqrqZDf/a8DU6VZMchA4CDA1NUWv1xuwFAbexsrKysDbGMbrGAZ7sWZSPsS93vmD1zAEvi/WPLZ8HR+8YrB/k5WVFfbs2bPl9d/4ie+OtBeDhvvfr6oTSX4MuC3JF9bPrKrqgv+HdH8IDgNMT0/XIFe9AfCJjw105RwMfvXdMGoYhmc/+NNc8+AQNvSNAWrYDzMz9w2hiME8du1kXJU584atrz80fkb6dkAvBgr3qjrR3T+S5MPAZcDDSS6sqpNJLgQeGUKdG5qUPbRJ+N0MLzOXtOVwT3I+8IyqeqybfgXwb4GbgTcA13X3HxlGoRsx0CSpb5A99yngw0me2M5/rapPJPlL4KYk88CDwOsGL1OSdC62HO5V9WXgZ08z/g3g8kGKkiQNxitUJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQaP4PXdJE2oov3/0ia1v4znPOm/w5x+S1nthuKt5rX+IN2vQH9aDtV4OYzvjthN60VS4+yHusxdrdsKHWDqdZsLdD3GfvZDkF6qS1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDRvbbMkmuAP4DsAv4T1V13aieS5KGKcnmlnvP2edX1RCq2ZqR7Lkn2QW8H3gl8CJgLsmLRvFckoYjyYa3B9/zyxsu04Kq2vC2tLS04TLjNKo998uAY1X1ZYAkR4ErgQdG9Hw6By3slQyLvejbzGvo9XrMzMyMvhgNLKN4UyZ5LXBFVf3z7vHrgZdX1dXrljkIHASYmpp62dGjR4dex3qzs7ND2c7S0tJQtjPpVlZW2LNnz7jLmAj2os9e9E1CL2ZnZ++qqunTzRvb77lX1WHgMMD09HSNem/AvZJzYy/67EWfveib9F6M6myZE8DedY8v7sYkSdtgVOH+l8AlSV6Y5EeAq4CbR/RckqSnGMlhmao6leRq4M9ZOxXyA1V1/yieS5L0w0Z2zL2qbgFuGdX2JUln5hWqktQgw12SGmS4S1KDDHdJatBIrlA95yKSvwIeHHcdwAXA18ddxISwF332os9e9E1CL36iql5wuhkTEe6TIsmdZ7qUd6exF332os9e9E16LzwsI0kNMtwlqUGG+//v8LgLmCD2os9e9NmLvonuhcfcJalB7rlLUoMMd0lqkOF+DpL8SpL7kzyeZGJPgdoOSX47yReS3Jvkw0meO+6axiXJu7s+3JPk1iQ/Pu6axi3J25JUkgvGXcu4JHlXkhPd++KeJK/azuc33M/N54F/DHxy3IVMgNuAA1X1M8D/At4x5nrG6ber6meq6lLgo8C/GXM9Y5VkL/AK4P+Mu5YJ8L6qurS7beuv5DYZ7kn2JVlOckO3p31rkmcl6T2xx53kgiTHu+k3JvlvSW5LcjzJ1UnemuSzSe5I8nyAqlquqi+O8aWdsxH24taqOtU9zR2s/W9bE22EvfjOuqc5H5j4sxRG1YvO+4B/zdOgDzDyXoxNk+HeuQR4f1W9GPg28E82WP4Aa3vlfw9YAL5XVS8BPg382gjr3A6j7sWbgI8PrdrRGkkvkiwk+Srwqzx99tyH3oskVwInqupzoyp6REb1Gbm6O2T3gSTPG37ZZ9ZyuH+lqu7ppu8C9m2w/FJVPVZVfwU8Cvz3bvy+Taw76UbWiySHgFPAh4ZV7IiNpBdVdaiq9rLWh6uHWfAIDbUXSf4G8E6ePn/c1hvF++J64CeBS4GTwHuHV+7GWg7376+bXmXtf506Rf81/+hZln983ePHGeH/WLVNRtKLJG8Efhn41Xr6XDAx6vfFh9h4r29SDLsXPwm8EPhcdwjjYuDuJH9ruGWPxNDfF1X1cFWtVtXjwA3AZcMu+mxaDvfTOQ68rJt+7RjrmATHGaAXSa5g7bjqP6qq7w2xrnE4zmC9uGTdwyuBLwyhpnE5zhZ7UVX3VdWPVdW+qtoHPAS8tKq+NtwSt81xBntfXLju4WtYOyFj2+y0cP8d4F8k+SxrP9d5TpK8JslDwM8BH0vy58MucBsN1Avg94FnA7d1p3n94VCr216D9uK6JJ9Pci9rZ4m8ZajVba9Be9GSQXvx75Pc170vZoF/NdTqNuDPD0hSg3banrsk7QiGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWrQ/wPLYNCAau+sagAAAABJRU5ErkJggg==\n"
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "df1.loc[7:9,\"num4\"]=[100,200,300] #定义异常值\n",
    "#from pandas.tools.plotting import scatter_matrix\n",
    "#pd.plotting.scatter_matrix(df1,marker='o', alpha=0.2, figsize=(6, 6), diagonal='kde')# 画出散点图粗略看是否存在异常值\n",
    "df1.boxplot() #箱体图常用来检测异常值"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 五、基于python的数据预处理\n",
    "现实世界中数据大体上都是不完整，不一致的脏数据，无法直接进行数据分析，或挖掘结果差强人意。为了提高数据挖掘的质量产生了数据预处理技术。\n",
    "数据预处理有多种方法：数据清理，数据集成，数据变换，数据归约等。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 5.1 标准化（Standardization）\n",
    "特征$X$标准化的公式为：\n",
    "$$\n",
    "X_i=\\frac{X_i-mean(X)}{std(X)},\\quad X=(X_1,X_2,...,X_d)\n",
    "$$\n",
    "一般会把train和test集放在一起做标准化，或者在train集上做标准化后，用同样的标准化器去标准化test集，此时可以用scaler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 20 entries, 0 to 19\n",
      "Data columns (total 5 columns):\n",
      " #   Column  Non-Null Count  Dtype  \n",
      "---  ------  --------------  -----  \n",
      " 0   num1    20 non-null     float64\n",
      " 1   num2    20 non-null     float64\n",
      " 2   num3    20 non-null     float64\n",
      " 3   num4    20 non-null     float64\n",
      " 4   num5    20 non-null     float64\n",
      "dtypes: float64(5)\n",
      "memory usage: 928.0 bytes\n"
     ]
    },
    {
     "data": {
      "text/plain": "       num1      num2      num3      num4      num5\n0 -1.647509 -1.647509 -1.647509 -1.045503 -1.647509\n1 -1.474087 -1.474087 -1.474087 -0.971928 -1.474087\n2 -1.300665 -1.300665 -1.300665 -0.898353 -1.300665\n3 -1.127243 -1.127243 -1.127243 -0.824778 -1.127243\n4 -0.953821 -0.953821 -0.953821 -0.751203 -0.953821",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>num1</th>\n      <th>num2</th>\n      <th>num3</th>\n      <th>num4</th>\n      <th>num5</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>-1.647509</td>\n      <td>-1.647509</td>\n      <td>-1.647509</td>\n      <td>-1.045503</td>\n      <td>-1.647509</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>-1.474087</td>\n      <td>-1.474087</td>\n      <td>-1.474087</td>\n      <td>-0.971928</td>\n      <td>-1.474087</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>-1.300665</td>\n      <td>-1.300665</td>\n      <td>-1.300665</td>\n      <td>-0.898353</td>\n      <td>-1.300665</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>-1.127243</td>\n      <td>-1.127243</td>\n      <td>-1.127243</td>\n      <td>-0.824778</td>\n      <td>-1.127243</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>-0.953821</td>\n      <td>-0.953821</td>\n      <td>-0.953821</td>\n      <td>-0.751203</td>\n      <td>-0.953821</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import sklearn\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "scaler = StandardScaler().fit(df1)\n",
    "df1_1=scaler.transform(df1)\n",
    "# sdf1_2=scaler.transform(testdata)# 操作中df1用测试集代替\n",
    "df1_1=pd.DataFrame(df1_1,columns=df1.columns) # 转换为数据框类型\n",
    "df1_1.info()\n",
    "df1_1.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 5.2  最小-最大规范化（MinMaxScaler）\n",
    "最小-最大规范化对原始数据进行线性变换，变换到[0,1]区间（也可以是其他固定最小最大值的区间）\n",
    "$$\n",
    "X_i=\\frac{X_i-X_{min}}{X_{max}-X_{min}}\n",
    "$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[0.         0.         0.         0.         0.        ]\n",
      " [0.05263158 0.05263158 0.05263158 0.01683502 0.05263158]\n",
      " [0.10526316 0.10526316 0.10526316 0.03367003 0.10526316]\n",
      " [0.15789474 0.15789474 0.15789474 0.05050505 0.15789474]\n",
      " [0.21052632 0.21052632 0.21052632 0.06734007 0.21052632]\n",
      " [0.26315789 0.26315789 0.26315789 0.08417508 0.26315789]\n",
      " [0.31578947 0.31578947 0.31578947 0.1010101  0.31578947]\n",
      " [0.36842105 0.36842105 0.36842105 0.32659933 0.36842105]\n",
      " [0.42105263 0.42105263 0.42105263 0.66329966 0.42105263]\n",
      " [0.47368421 0.47368421 0.47368421 1.         0.47368421]\n",
      " [0.52631579 0.52631579 0.52631579 0.16835017 0.52631579]\n",
      " [0.57894737 0.57894737 0.57894737 0.18518519 0.57894737]\n",
      " [0.63157895 0.63157895 0.63157895 0.2020202  0.63157895]\n",
      " [0.68421053 0.68421053 0.68421053 0.21885522 0.68421053]\n",
      " [0.73684211 0.73684211 0.73684211 0.23569024 0.73684211]\n",
      " [0.78947368 0.78947368 0.78947368 0.25252525 0.78947368]\n",
      " [0.84210526 0.84210526 0.84210526 0.26936027 0.84210526]\n",
      " [0.89473684 0.89473684 0.89473684 0.28619529 0.89473684]\n",
      " [0.94736842 0.94736842 0.94736842 0.3030303  0.94736842]\n",
      " [1.         1.         1.         0.31986532 1.        ]]\n"
     ]
    }
   ],
   "source": [
    "from sklearn.preprocessing import MinMaxScaler\n",
    "scaler = MinMaxScaler()\n",
    "scaler.fit(df1) #scaler.fit(X_train)\n",
    "traindata=scaler.transform(df1)#X_train_pp = scaler.transform(X_train),切记这时数据变为数组类型了\n",
    "#X_test_pp = scaler.transform(X_test)\n",
    "print(traindata)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 5.3 规范化(Normalization)\n",
    "规范化是将不同变化范围的值映射到相同的固定范围，常见的是[0,1]，此时也称为归一化。\n",
    "$$\n",
    "X_i=\\frac{X_i}{\\|X\\|_2}\n",
    "$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[0.         0.00396308 0.0078069  0.00667437 0.01515434]\n",
      " [0.02012109 0.02377847 0.02732416 0.01779831 0.03409727]\n",
      " [0.04024218 0.04359385 0.04684142 0.02892226 0.0530402 ]\n",
      " [0.06036327 0.06340924 0.06635868 0.0400462  0.07198312]\n",
      " [0.08048436 0.08322463 0.08587594 0.05117015 0.09092605]\n",
      " [0.10060545 0.10304002 0.10539319 0.06229409 0.10986898]\n",
      " [0.12072655 0.12285541 0.12491045 0.07341804 0.12881191]\n",
      " [0.14084764 0.14267079 0.14442771 0.22247891 0.14775484]\n",
      " [0.16096873 0.16248618 0.16394497 0.44495782 0.16669776]\n",
      " [0.18108982 0.18230157 0.18346222 0.66743672 0.18564069]\n",
      " [0.20121091 0.20211696 0.20297948 0.11791382 0.20458362]\n",
      " [0.221332   0.22193235 0.22249674 0.12903777 0.22352655]\n",
      " [0.24145309 0.24174773 0.242014   0.14016171 0.24246947]\n",
      " [0.26157418 0.26156312 0.26153126 0.15128566 0.2614124 ]\n",
      " [0.28169527 0.28137851 0.28104851 0.1624096  0.28035533]\n",
      " [0.30181636 0.3011939  0.30056577 0.17353355 0.29929826]\n",
      " [0.32193745 0.32100928 0.32008303 0.18465749 0.31824118]\n",
      " [0.34205855 0.34082467 0.33960029 0.19578144 0.33718411]\n",
      " [0.36217964 0.36064006 0.35911755 0.20690538 0.35612704]\n",
      " [0.38230073 0.38045545 0.3786348  0.21802933 0.37506997]]\n"
     ]
    }
   ],
   "source": [
    "from sklearn import preprocessing\n",
    "#normalizer = preprocessing.Normalizer(norm=\"l2\")  # 函数实例化\n",
    "#df_exam= normalizer.fit(df1) #建立变换标准\n",
    "#df_train_norm2= df_exam.transform(df1) #对df1进行规范化变换\n",
    "#df_test_norm2=df_exam.transform(testdata),对测试数据进行规范变换\n",
    "#np.sum(df_train_norm2[:,1]**2)\n",
    "#help(preprocessing.Normalizer)\n",
    "X_train_norm = preprocessing.normalize(df1, norm='l2', axis=0)  # 直接用标准化函数(这里axis=0指定的是列)\n",
    "#np.sum(X_train_norm[:,2]**2)\n",
    "#测试集如何规范化呢？\n",
    "print(X_train_norm)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 5.3 类别特征编码\n",
    "有时候特征是类别型的，而一些算法的输入必须是数值型，此时需要对其编码。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": "   year  pop  GDP  state_Nevada  state_Ohio  state_nan\na  2000  1.5    1             0           1          0\nb  2001  1.7   10             0           1          0\nc  2002  3.6    3             0           1          0\nd  2001  2.4    4             1           0          0\ne  2002  2.9    5             1           0          0",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>year</th>\n      <th>pop</th>\n      <th>GDP</th>\n      <th>state_Nevada</th>\n      <th>state_Ohio</th>\n      <th>state_nan</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>a</th>\n      <td>2000</td>\n      <td>1.5</td>\n      <td>1</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>b</th>\n      <td>2001</td>\n      <td>1.7</td>\n      <td>10</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>c</th>\n      <td>2002</td>\n      <td>3.6</td>\n      <td>3</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>d</th>\n      <td>2001</td>\n      <td>2.4</td>\n      <td>4</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>e</th>\n      <td>2002</td>\n      <td>2.9</td>\n      <td>5</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "newdf=pd.get_dummies(df,columns=[\"state\"],dummy_na=True)\n",
    "newdf.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 5.4 连续变量离散化\n",
    "数据分析和统计的预处理阶段，经常的会碰到年龄、消费等连续型数值，我们希望将数值进行离散化分段统计，提高数据区分度。\n",
    "在机器学习中，在决策树算法常用这种方式，这里介绍pd.cut()函数的用法。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[30 71 95 61 72 73 74 81 45 30 77 34 25 45 91 97 91 82 97 58]\n"
     ]
    }
   ],
   "source": [
    "score_list = np.random.randint(25, 100, size=20) # 随机生成整数序列\n",
    "print(score_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[(0, 59], (70, 80], (80, 100], (59, 70], (70, 80], ..., (80, 100], (80, 100], (80, 100], (80, 100], (0, 59]]\n",
      "Length: 20\n",
      "Categories (4, interval[int64]): [(0, 59] < (59, 70] < (70, 80] < (80, 100]]\n"
     ]
    }
   ],
   "source": [
    "bins = [0, 59, 70, 80, 100] #　指定多个区间\n",
    "score_cut = pd.cut(score_list, bins)\n",
    "# print(type(score_cut)) #此时数据为类别型\n",
    "print(score_cut)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(0, 59]      7\n",
      "(80, 100]    7\n",
      "(70, 80]     5\n",
      "(59, 70]     1\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "print(pd.value_counts(score_cut))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": "    score Categories\n0      30        low\n1      71       good\n2      95    perfect\n3      61     middle\n4      72       good\n5      73       good\n6      74       good\n7      81    perfect\n8      45        low\n9      30        low\n10     77       good\n11     34        low\n12     25        low\n13     45        low\n14     91    perfect\n15     97    perfect\n16     91    perfect\n17     82    perfect\n18     97    perfect\n19     58        low",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>score</th>\n      <th>Categories</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>30</td>\n      <td>low</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>71</td>\n      <td>good</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>95</td>\n      <td>perfect</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>61</td>\n      <td>middle</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>72</td>\n      <td>good</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>73</td>\n      <td>good</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>74</td>\n      <td>good</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>81</td>\n      <td>perfect</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>45</td>\n      <td>low</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>30</td>\n      <td>low</td>\n    </tr>\n    <tr>\n      <th>10</th>\n      <td>77</td>\n      <td>good</td>\n    </tr>\n    <tr>\n      <th>11</th>\n      <td>34</td>\n      <td>low</td>\n    </tr>\n    <tr>\n      <th>12</th>\n      <td>25</td>\n      <td>low</td>\n    </tr>\n    <tr>\n      <th>13</th>\n      <td>45</td>\n      <td>low</td>\n    </tr>\n    <tr>\n      <th>14</th>\n      <td>91</td>\n      <td>perfect</td>\n    </tr>\n    <tr>\n      <th>15</th>\n      <td>97</td>\n      <td>perfect</td>\n    </tr>\n    <tr>\n      <th>16</th>\n      <td>91</td>\n      <td>perfect</td>\n    </tr>\n    <tr>\n      <th>17</th>\n      <td>82</td>\n      <td>perfect</td>\n    </tr>\n    <tr>\n      <th>18</th>\n      <td>97</td>\n      <td>perfect</td>\n    </tr>\n    <tr>\n      <th>19</th>\n      <td>58</td>\n      <td>low</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_dic={\"score\":score_list}\n",
    "df=pd.DataFrame(df_dic)# 定义数据框类型\n",
    "df['Categories'] = pd.cut(df_dic['score'], bins, labels=['low', 'middle', 'good', 'perfect']) #命名离散化后的该列数据\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0 1 3 1 1 2 2 2 0 0 2 0 0 0 3 3 3 2 3 1]\n"
     ]
    }
   ],
   "source": [
    "#等频法\n",
    "scorequan = pd.qcut(score_list,4)  # 按四分位数进行切割，可以试试 pd.qcut(data,10)\n",
    "#pd.value_counts(scorequan)\n",
    "print(scorequan.codes) # 0-3对应分组后的四个区间，用代号来注释数据对应区间，结果为ndarray"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 六、数据清洗\n",
    "数据合并、转换、过滤、排序"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  level  numeber1\n",
      "0     a         1\n",
      "1     b         3\n",
      "2     c         5\n",
      "3     d         7\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "data1 = pd.DataFrame({'level':['a','b','c','d'],\n",
    "                 'numeber1':[1,3,5,7]})\n",
    "\n",
    "data2=pd.DataFrame({'level':['a','b','c','e'],\n",
    "                 'numeber2':[2,3,6,10]})\n",
    "print(data1)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  level  numeber2\n",
      "0     a         2\n",
      "1     b         3\n",
      "2     c         6\n",
      "3     e        10\n"
     ]
    }
   ],
   "source": [
    "print(data2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  level  numeber1  numeber2\n",
      "0     a         1         2\n",
      "1     b         3         3\n",
      "2     c         5         6\n"
     ]
    }
   ],
   "source": [
    "print(pd.merge(data1,data2)) #可以看到data1和data2中用于相同标签的字段显示，而其他字段则被舍弃"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  level1  numeber1 level2  numeber2\n",
      "0      a         1      a         2\n",
      "1      b         3      b         3\n",
      "2      c         5      c         6\n"
     ]
    }
   ],
   "source": [
    "data3 = pd.DataFrame({'level1':['a','b','c','d'],\n",
    "                 'numeber1':[1,3,5,7]})\n",
    "data4=pd.DataFrame({'level2':['a','b','c','e'],\n",
    "                 'numeber2':[2,3,6,10]})\n",
    "print(pd.merge(data3,data4,left_on='level1',right_on='level2'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "两个数据框中如果列名不同的情况下，我们可以通过指定letf_on 和right_on两个参数把数据连接在一起"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 重叠数据合并\n",
    "有时候我们会遇到重叠数据需要进行合并处理，此时可以用comebine_first函数。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  level  numeber1  numeber2\n",
      "0     a       1.0       2.0\n",
      "1     b       3.0       NaN\n",
      "2     c       5.0       6.0\n",
      "3     d       NaN      10.0\n"
     ]
    }
   ],
   "source": [
    "data3 = pd.DataFrame({'level':['a','b','c','d'],\n",
    "                 'numeber1':[1,3,5,np.nan]})\n",
    "data4=pd.DataFrame({'level':['a','b','c','e'],\n",
    "                 'numeber2':[2,np.nan,6,10]})\n",
    "print(data3.combine_first(data4))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "可以看到相同标签下的内容优先显示data3的内容"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 数据替换\n",
    "除了使用我们上面提到的fillna的方法外，还可以用replace方法，而且更简单快捷。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    a   b\n",
      "0  20  20\n",
      "1   3   3\n",
      "2   3   3\n",
      "3   4   5\n"
     ]
    }
   ],
   "source": [
    "data=pd.DataFrame({'a':[1,3,3,4],\n",
    "              'b':[1,3,3,5]})\n",
    "print(data.replace(1,20)) "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 七、财经数据模块tushare"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "本接口即将停止更新，请尽快使用Pro版接口：https://waditu.com/document/2\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 547 entries, 183 to 729\n",
      "Data columns (total 7 columns):\n",
      " #   Column  Non-Null Count  Dtype  \n",
      "---  ------  --------------  -----  \n",
      " 0   date    547 non-null    object \n",
      " 1   open    547 non-null    float64\n",
      " 2   close   547 non-null    float64\n",
      " 3   high    547 non-null    float64\n",
      " 4   low     547 non-null    float64\n",
      " 5   volume  547 non-null    float64\n",
      " 6   code    547 non-null    object \n",
      "dtypes: float64(5), object(2)\n",
      "memory usage: 34.2+ KB\n"
     ]
    },
    {
     "data": {
      "text/plain": "           date   open  close   high    low     volume    code\n725  2020-12-25  17.66  17.64  17.85  17.51   613015.0  601688\n726  2020-12-28  17.60  17.39  17.73  17.31   674174.0  601688\n727  2020-12-29  17.48  17.40  17.72  17.39   643286.0  601688\n728  2020-12-30  17.39  17.51  17.59  17.35   772371.0  601688\n729  2020-12-31  17.61  18.01  18.22  17.61  1480287.0  601688",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>date</th>\n      <th>open</th>\n      <th>close</th>\n      <th>high</th>\n      <th>low</th>\n      <th>volume</th>\n      <th>code</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>725</th>\n      <td>2020-12-25</td>\n      <td>17.66</td>\n      <td>17.64</td>\n      <td>17.85</td>\n      <td>17.51</td>\n      <td>613015.0</td>\n      <td>601688</td>\n    </tr>\n    <tr>\n      <th>726</th>\n      <td>2020-12-28</td>\n      <td>17.60</td>\n      <td>17.39</td>\n      <td>17.73</td>\n      <td>17.31</td>\n      <td>674174.0</td>\n      <td>601688</td>\n    </tr>\n    <tr>\n      <th>727</th>\n      <td>2020-12-29</td>\n      <td>17.48</td>\n      <td>17.40</td>\n      <td>17.72</td>\n      <td>17.39</td>\n      <td>643286.0</td>\n      <td>601688</td>\n    </tr>\n    <tr>\n      <th>728</th>\n      <td>2020-12-30</td>\n      <td>17.39</td>\n      <td>17.51</td>\n      <td>17.59</td>\n      <td>17.35</td>\n      <td>772371.0</td>\n      <td>601688</td>\n    </tr>\n    <tr>\n      <th>729</th>\n      <td>2020-12-31</td>\n      <td>17.61</td>\n      <td>18.01</td>\n      <td>18.22</td>\n      <td>17.61</td>\n      <td>1480287.0</td>\n      <td>601688</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import tushare as ts\n",
    "ht=ts.get_k_data(code=\"601688\",start='2018-1-1',end='2020-4-7',autype='qfq')\n",
    "ht.info()\n",
    "ht.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "<Figure size 432x288 with 1 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAD4CAYAAAD1jb0+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAABjS0lEQVR4nO2dd5xcVfn/32f6zPaabHohhZCEkITQCaE3QfyK6JeO0kQUREVFUfypWBBFEEUBgS8iKFWQKtJ7gASSkEaySTbJZuvs7vRbzu+Pe6dtyW52Z8tszvv1yisz55Y5d2b2M899zlOElBKFQqFQ5B+O4Z6AQqFQKPqHEnCFQqHIU5SAKxQKRZ6iBFyhUCjyFCXgCoVCkae4hvLFKisr5ZQpU4byJRUKhSLvef/995uklFWdx4dUwKdMmcLy5cuH8iUVCoUi7xFCbOluXLlQFAqFIk9RAq5QKBR5ihJwhUKhyFOG1AeuUCgUfUXTNOrq6ojFYsM9lSHD5/MxYcIE3G53n/ZXAq5QKEYkdXV1FBUVMWXKFIQQwz2dQUdKSXNzM3V1dUydOrVPxygXikKhGJHEYjEqKir2CvEGEEJQUVGxR3ccSsAVCsWIZW8R7yR7er29CrgQYqIQ4iUhxBohxGohxDcytl0phFhrj/+qH/PdK5FS8vjGx4kb8eGeikKhyGP64gPXgWuklB8IIYqA94UQLwBjgNOB/aWUcSFE9WBOdDTx0raX+OEbP2RT2ya+ueibwz0dhUKRp/Qq4FLKncBO+3GHEOITYDxwMfALKWXc3tYwmBMdTXQkOgBojjYP80wUCkU+s0c+cCHEFOAA4B1gJnCEEOIdIcQrQogDezjmEiHEciHE8sbGxgFPWKFQKIaSm2++mblz5zJ37lx+97vfUVtby+zZszn77LPZd999+fznP08kEgHg/fffZ+nSpSxatIgTTjiBnTt3AnDUUUdx7bXXsmTJEmbOnMlrr72Wk7n1OYxQCFEIPAJcJaVsF0K4gHLgYOBA4B9CiGmyU482KeWfgT8DLF68WPVvUygUe8wNT65mzY72nJ5zzrhifvSZ/Xa7z/vvv89f//pX3nnnHaSUHHTQQSxdupR169Zx1113cdhhh3HRRRdx++23841vfIMrr7ySJ554gqqqKh566CGuu+467r77bgB0Xefdd9/l6aef5oYbbuA///nPgK+hTwIuhHBjifffpJSP2sN1wKO2YL8rhDCBSkCZ2b0gUb9jCkU+8Prrr3PGGWdQUFAAwOc+9zlee+01Jk6cyGGHHQbAOeecw+9//3tOPPFEVq1axXHHHQeAYRjU1NSkzvW5z30OgEWLFlFbW5uT+fUq4MKKa7kL+ERKeXPGpseBZcBLQoiZgAdoysmsFAqFIoPeLOWhpnO4nxACKSX77bcfb731VrfHeL1eAJxOJ7qu52QeffGBHwacCxwthFhh/zsZuBuYJoRYBTwInN/ZfaJQKBT5zBFHHMHjjz9OJBIhHA7z2GOPccQRR7B169aUUD/wwAMcfvjhzJo1i8bGxtS4pmmsXr16UOfXlyiU14GeosvPye10FAqFYuSwcOFCLrjgApYsWQLAV77yFcrKypg1axZ/+MMfuOiii5gzZw6XX345Ho+Hhx9+mK9//eu0tbWh6zpXXXUV++03eHcPqhbKMKBuVBSK/OGb3/wm3/xmOl+jtrYWl8vF/fff32XfBQsW8Oqrr3YZf/nll1OPKysrc+YDV6n0w4BmasM9BYVCMQpQAj4MKAFXKPKXKVOmsGrVquGeBqAEfFjQDCXgCoVi4CgBHwY6W+DXvnot33nlO8M0G4VCka8oAR8GOgv405uf5pnaZ9jQumGYZqRQKPIRJeDDQFLAdVPnM499JjXeFm8brikpFIo8RAn4MBDTEwBEEnFq22tT42pxU6EY+fz4xz/mpptuGu5pAErAh4WNDUEA1tS3ZI0rAVcoFHuCEvBhQDOtOggJM5I9rqJTFKOEuBHnW698i0+Dnw73VAbMfffdx/z589l///0599xzs7atWLGCgw8+mPnz53PGGWfQ2toKwO9//3vmzJnD/Pnz+eIXvwhAOBzmoosuYsmSJRxwwAE88cQTA56bysQcBkxsHzjRrHFlgStGC2/teIvnap8jrse59ZhbB37CZ74L9R8P/DyZjJ0HJ/1it7usXr2an/70p7z55ptUVlbS0tLC73//+9T28847j1tvvZWlS5dy/fXXc8MNN/C73/2OX/ziF2zevBmv10swGATgZz/7GUcffTR33303wWCQJUuWcOyxx6YqHfYHZYEPA4a0LHBdKgFXjE52hHYAUBmoHOaZDIz//ve/nHnmmVRWWtdRXl6e2tbW1kYwGGTp0qUAnH/++ak0+vnz53P22Wdz//3343JZdvLzzz/PL37xCxYsWMBRRx1FLBZj69atA5qfssCHgaSAG50s8ISRGI7pKBQ5JyngFb6K3JywF0t5pPHvf/+bV199lSeffJKf/exnfPzxx0gpeeSRR5g1a1bOXkdZ4MNAygJXLhTFKOWDHZbvO9+NkqOPPpp//vOfNDdb/WtbWtKBByUlJZSVlaXao/3f//0fS5cuxTRNtm3bxrJly/jlL39JW1sboVCIE044gVtvvTVVzO7DDz8c8PyUBT4MJAVcYmSNKwFXjBY+3FGHKwARPdL7ziOY/fbbj+uuu46lS5fidDo54IADmDJlSmr7vffey2WXXUYkEmHatGn89a9/xTAMzjnnHNra2pBS8vWvf53S0lJ++MMfctVVVzF//nxM02Tq1Kk89dRTA5qfEvBhoLD1o27f+Xy3VhSKJMJpCXdUj/ay58jn/PPP5/zzz+9224IFC3j77be7jL/++utdxvx+P3fccUdO56ZcKMOAQ+s+41JZ4IrRgnCGgdEh4CMZJeDDgCG6b3CkBFwxGjClmbLAY3psmGczulECPgwYPYxroV1DOg+FYjDoSHQghLVQpyzwwUUJ+DCg92SBxzuGeCYKRe5pjbWmHisBH1yUgA8DPVngYU0tYiryn7atbwLgMJ1KwAcZJeDDgNG9Ac72oLLAFflPomE1AAFDKAEfZHoVcCHERCHES0KINUKI1UKIb3Tafo0QQgoh8jtndggx6F7BjR5tc4Uif0gu0peaBlEtv+PAa2trmTt3bpfx66+/nv/85z+7PXYoys72JQ5cB66RUn4ghCgC3hdCvCClXCOEmAgcDwwsoX8vQxfgkrKLL9xEH6YZKRS5Iyng1TLKrgRIKRE9rPvkKz/5yU+GewpAHyxwKeVOKeUH9uMO4BNgvL35t8B3ADloMxyFGEJQaJpdxk2pBFyR/+j2HWaZYaIhiRvxYZ7RwDAMg4svvpj99tuP448/nmg0ygUXXMDDDz8MwNNPP83s2bNZtGgRX//61zn11FNTx65Zs4ajjjqKadOmZVUxzBV7lIkphJgCHAC8I4Q4HdgupVw52n5dBxsdQYEpCTqzx40+WuAvbn2Rre1buXDuhYMwO4ViYOgOyy4sMyyX4EmPnsTMspn89qjfEnAH+nXOX777S9a2rM3ZHAFml8/m2iXX9rrfhg0b+Pvf/85f/vIXvvCFL/DII4+ktsViMS699FJeffVVpk6dype+9KWsY9euXctLL71ER0cHs2bN4vLLL8ftdufsGvq8iCmEKAQeAa7Ccqt8H7i+D8ddIoRYLoRY3tjY2N95jip0ISjqZIG7peziAzeliSmz92uMNHLVS1dx8/s3D/o8FYr+kPwWl9nf8aZoE2/ueJMPGj4YvkkNgKlTp7JgwQIAFi1aRG1tbWrb2rVrmTZtGlOnTgXoIuCnnHIKXq+XyspKqqur2bUrt7kefbLAhRBuLPH+m5TyUSHEPGAqkLS+JwAfCCGWSCnrM4+VUv4Z+DPA4sWLlasF6wte0EnAPVKim9kW+FlPnUVDpIFXznolNbYjvCP12JQmDqECiRQjC932qJYZ2d9xn9PX73P2xVIeLLxeb+qx0+kkGu17ZE3nY3U9t27SvkShCOAu4BMp5c0AUsqPpZTVUsopUsopQB2wsLN4K7rHEHRrgWsy2wJf27KWllh230yVJKEY6Rj29zjpQkmyva37GkD5zKxZs9i0aVPKKn/ooYeG9PX7Yr4dBpwLHC2EWGH/O3mQ5zWq0RH4ZPbNiEdK9D4sYmYKeigRyvncFIqBkryTLO9kpKxraB6O6Qwqfr+f22+/nRNPPJFFixZRVFRESUnJkL1+ry4UKeXr0EPgcnqfKbma0N6AIWCMYbBPIsFGjwcAlwQ9wwcuMwTeMA2cDmvFM1PAw1p4iGasUPQdw27OXdrJAheO/Ms0njJlCqtWrUo9/9a3vtVln2XLlrF27VqklFxxxRUsXrwYsOLAM8k8T65QDtShxjQxAJ8peWx7Pb/ZZS3sTtc09AwXSqY4dyTSGZotrZtTj1tjQ5O5+Wnrp1z21C/Z3j76LChF7tHtqpqdfeAJY3RWJvzLX/7CggUL2G+//Whra+PSSy8dstdWAj7EmKaGFILkW398JMrHm7dSYRjopL/wzbG0WAbjwdTjlrba1OO22NC4UK767/d4o/l+rnnq70Pyeor8JulC8UvJ95taeHC7tTQWzvOszJ64+uqrWbFiBWvWrOFvf/sbgUD/QiX7g+rIM8QYmmWFmHiyxt1SphZ/AG5898bU4ywBj7elsjiDQyTgccNaLA0OkcWvyG+SAu6Ski91hFKOwUg/Ft1HYxbn7pByzwL1lAU+xOh2VpoprPCiHbKc/018H7ckZYE3RRp5c/ubqWPaE+2pxy2JdiZq1h9I+xCVn3U5rN95Q47OW2BFbtFsH3gyT80JuE2I7mFzB5/PR3Nz8x6LWr4ipaS5uRmfr+/hlsoCH2IMW8ANW8B16eT2ay/jrntuTwn4m2/dhERypOcCXk3cQ1s8HX7VoofZV9fZ7HHTFh8aCzxp/+goAVf0jmZaNrcrQ3h9UhLbQwt8woQJ1NXVsTclAPp8PiZMmNDn/ZWADzGG3bg4KeAAJQU+3EiMpAW+/R0A5q5/gVenQMxe/JFS0mImUhZ4W7ydocDVtgFcAoP8rmmhGBqSLpTMShF+UxIz9kzA3W53KsNR0T3KhTLEpF0o1m3ShPIAwuHGLSWmkMy7dx5BpxO3lPyvsIQ82VfwhEdOwBBQo+tUJRx81Pze0MzZjk83iNEWb+POj+/skuKvUCTRTB2HlLxoLEqNBaRJfJRGoQwnSsCHGEO3XSgOS8AdAA4Hrgw3X9DhoMww8NuJEMlbz53hnQCUmiYHhRysb1s5JNmYcXsRyZRxfvbOz7jlg1t4t/7dQX9dRX5imDpO4Jf6WamxgDRIGKMzCmU4UQI+xKRdKNkLFS6ZXmlvwqDENHEDDinZ1KkAjldKanQdiczyjw8WWlLAiaZi0hNG/iVlKIYGXRq4pCRQWJwa80mJvocuFEXvKAEfYvSk8Ins5QdnhoBv1cOUGiYC64vf1m4JuN/lp9KA48IRKk3rPEMh4PFUGFeEtqjlTglG1O2wont008Ap4dJj90+NeaTENNUaSq5RAj7E6LYLJeYqhwkHwum3AeDKqFaw1YxQartPfFKiJ9qRUhLTY5we0XACY03LmtkVbmWwSSQFXESJxq151Y3CwkSK3KBJAycgPQWpMbcEw1R3bblGRaEMMckvsXR44aJ0T71MF4oE3IabE+O/wG/+Hk0PEdWjSCQFdoxtjYwAJezqyK5WmGuklCkXiiSGafvqmyLBQX1dRf5i2Ba4y+3h964LCUVjeOQLGGjDPbVRh7LAh5ikD1yQ3ZXD2emj8OleKiur8UmThBknolsLQAVGgo/NKRTbFnrjIAtpIsNqMkWcZIG55oiywBXdY0gTJ+B0OHjIdRqfyMl4pMRQLQNzjhLwIUa3LWiHM/vmx9Wp4GOx5ubLy/bFJyUJM0HEriNRZOp8bE6l2C4U1BwdXCFNLla6pCTujFIbs6JP2mLBQX1dRf6iSQOHBJdTUBpwo+GyS0UoAc81SsCHmKQLxSE6C3j2R1Gge/H6CvCZkoTUUhZ4wJRskuNweSsREv615R6e2PjEoM03nrBed4yeXRpUhjYN2msq8htDmpYLxSG449xFfOmQ6bglmJ1aBioGjhLwISblQhHZxaw6C3hA8+H1B/BJiYaWKi8bME2ieIlNO5Fi0yRmRPjBGz8YtPlqdh2WMUa29RTRVS1yRfcY9iKm2+lgQlmA0xdOsVwoGLTF21jXsm64pzhqUAI+xCRrJTscndaPHdkfhVsLEPB5LReK1IlEmgAokJKo9OAaO4cqY/BvSeN2wawyPdvFEzVVNyBF9+iYOKTA7bS/M04PbiSmMLjwuQv5/JOfH94JjiKUgA8xaQs8exFTF1bliCXRGP/Y1kQCLwGPE490oAmDcIfVzDhgSpzCxFdYSqkx+OnsCc0S6gI9e74RlAWu6B5DmjgAV9IocXpSFviG1g2ASgTLFUrAh5hkuymHI1sQNfvLHjBN9tUjdBCgstCLVwoSGMTCVjKPT5qUEsIdKMkqFjRYaHbFQ69mZY66JJzZ3kFEqEQeRfcY0rLAXSkL3I1HSqRI14vI7DKl6D9KwIcYw3ahiE4C7hbWR5FsBKu5iijwuigyHUQcOrGIFe/tk5L3zZngLc4q1zlYxG0L3KkXAjBB06k0TCIOA81Ucb2KrhhIHNLygQOWC6XTVzWk9c8Fd9uHt/HWjrcGOMPRg0rkGWJSYXmO7EXMJZqbbzU38vkOWzD9Vh2JIunAFNAUtzIuT4/dyHY5EbxFFGV0/U4YCTzO7HPmgnjCcpXoejE/btzAodEYrwX8SAGN4WbGFY3N+Wsq8hsDEwcClyNtgbs7GRuhRP8E/I6P7gDg4/M/HtAcRwvKAh9ikgLudHizxj1S5/z2DgrsL7q7sAyAEmn9xtYnrHjvqGn32/MVc21zKw7T+gjbE+1c+sKlvFb3Wk7nG7ejXyJmEf8TClNjGJTYa6ebWutz+lqK0YEhpb2ImbTALRdKJh3a7l0oL2x5gXOePierG0/cULVUOqMEfIiJ2V9CpzO7GqHHyF4UDBRZAl5qhxvutAXc4ynkR5+ZA95iKk2TA3bNAmBlw0re3PEm17z83dzO1y5XGxclqbEt+gwAPm3dmtPXUowOdCSCTB+4p4uAd7bAGyIN3PnxnSnB/vYr32ZlY3a55PYhamCST/Qq4EKIiUKIl4QQa4QQq4UQ37DHfy2EWCuE+EgI8ZgQonTQZzsKiKdcKP6scX8025otKKkEoNxp7bcz0YHHlJx6wBQuPGwqeIsA+Iz4AIAXt7wEQDhUQi5JNpOQ7rLU2NETJgGwsXVzTl9LMTrQsSzwzCgUd6d9Oi9ifve173LLB7ewIWhFqQRc1p1mZrXNzN6wCou+WOA6cI2Ucg5wMHCFEGIO8AIwV0o5H1gPfG/wpjl6SAm4c/eNS0vLygGo8lgCvssI45USp8d2vbis/ydJ60v95GYrG7NzjZWBkmrn5q4AoE5WUllQSqVusrVjS05fSzE6MJA4M+PAHa6UBW5ErB//zouYSUFPtmPzu63vfTAeTO2TKeBqAd2iVwGXUu6UUn5gP+4APgHGSymflzJV3OBtoO+dOPdiUouYTv9u9ysvrwagymtFfyQw8UiJ15W97rx/LM5hkfRtpnDlNsEm6Xf0BCq5TruIm2puxuMvZqqmsTOiXCiKruhIhHSkfeBCIOyg12PkWqCrC0XYtYCSAp6ywBMZFniGC+WiZy8anMnnGXvkAxdCTAEOAN7ptOki4JkejrlECLFcCLF8b+ou3ROJZMPXThb4O8f8kzPj16eeV1dVAVDoLcCRXNiU4HGlP7L2E2/FA/xxVyPf3eFkbHAyOHMbXxu1BdznKeDMy37Ezy48BW9BMZM0jdbEzpy+lmJ0oAlLwFM+cCDhsATcLyUu3Z9qD5hE2CWLkyUj/K7dW+ArGleoZCD2QMCFEIXAI8BVUsr2jPHrsNwsf+vuOCnln6WUi6WUi6tsUdqbiRsJXFLicGZHoRRNP5j35Gwu9/ycB+XxVJdbPmeHx4/PXv/xSPC60uk7xfseDYAAzo5v5gvmSnBEufLFK1PVCwdKzIjb8/WxYGIpBV4XTm8hE3SdmNmu6looumAkLfCM8hBxu3SEW0oWaK1sblmbdYzDtApdRZI1f9yWBZ5pdXfuPrW6eXXuJ59n9EnAhZX3/QjwNynloxnjFwCnAmdLOQRZJaOAhKnhlrJLOdk544p59dvL+N13LmPpN+/D47aF2uXFb3ZvgeMvzzrH7IRlkbxc9zLv73o/J/ONGwl8nefrKWCcbt1JqLoWis5ogAMHDkfaAtdSAg5TNI1NwU+zQgRFi7UgHm7ZCHRvgQc7RaEEVUnjPkWhCOAu4BMp5c0Z4ycC3wFOk1KqdtN9JG7qeKVEOLvmUE2qCOB1OakpyfCPu/wE7C+6S4psAfcEso5fFEvHyTZGc+OuihpxfKZEODMWRz0FjM0oL2vKwa/JosgfdAHI7EIP+xqWmC+LRJis6bSbiZRLZEXDCj4WlvER1uz1HLvkRGss3TKwIx7GaQr+b4cVsdWu0vH7ZIEfBpwLHC2EWGH/Oxm4DSgCXrDH/jSYEx0tJEwdj5SIztUIe8ITIGDfXrokeF09f2QBKZkaPg2AbR3bBjxXgLip4ZMmDlemgBcyK5H2P6q6FopMdCwLPJPZ0sPy2q0sjcYotH/wG0LW9+bcZ85N7Re2695rO1cCsGrnp6ltsa2vUyx1JmrW3d+uULDLa79W9xq/fu/XWWNvbH+Dn739MwDuW30fD619aABXN7LoVUWklK9Dp3YxFk/nfjqjn4TU8UhwOvu4/OApoMD+wrukI9sC74YJkekYYyflTMBjpu1Cyazd4i+lQEp+2tjMD6oqaE+0U+LNjj//pPkTTGmyX+V+OZmHIn/QBDg6S4vTjVfC48ahJMz1AASjXStaRmxjIK5HwS1YU78htS3RvI6AW6RKSNR3BLsc/9UXvwrAVQuvwm3fNX77pavoMGKcve/Z/Hq5Je5nzT5rYBc5QlCZmENMwtRxS4lTdPeb2A3ugpQP3CUFnk7C/9biWzgvcS23TrC8W7eHrsbR2E5tW25C/KKGhk9KyHShTDgQ3IHUH1J7oh3N1FIhYACXvnApX/z3F1nfuj4n81DkB1JKDCGgU61Mr9eKuopJD8J2r3Qkop0PJ2zHh8ftuj7SnV64DLs8+KWJB3Cbu28nmBnlMj1s7feZxz+z5xc0wlECPsQkpI5LkrXAs1s8gZQP3GmKLhb4pqplvGruD77i1Nj4WIyt7VvJxbpyXGp4TZntQnE44erVqcbKDaFWlj64lNMet9w3Ukpa7eJbr24YvHZvipFHsuerkNkWuLAF+bTF0/DY1nk40bUkcTI+PGpLk+a0ulG9V/8ebU4XAduYKTRN2uM9u+4y70CLO0V8gRVv3hJr6fN1jVSUgA8xCWlYLpS+vvPuAvy2UDqkIyuMECCmWdukpyg1NllPEDXCXcKu+kPM1C0XiqtThmegHJfHCgv9uGE9HVpH6o8ms37F62ufGvAcFPmDZn/2DtGpWr3LssADgUIq/NbjUDzaZQH8pdbVtMZaScj0IvkPX/8RFz13ER84NQL2/iWmQTjeSmektAyj2ra0gHe40zkXRtsBAJz++OksfWhpToyc4UQJ+BCjSQOXFDj66kLxBPDbXzJHNz7wg6ZaoYSHzJmaGpthLwTlwg8el9aiq9vZtX1Eodvye/9lzW9TY4Zp8PX/fj31fKcK9dqr0Oz8gy4+cH+p9b/bj8deTwlr0SwreKKm0WHEeHDtg+iYlBuWiL+w9bnUPgFT8poxl7BD8En4bT5q/Ci1zRJj6+9qW/uO1HiHqbMoGuN3uxq5Nm7VEt/aYbkYc2HkDCdKwIeYuDRwS/ou4O4AbXZCRLnm6RKFMnd8CbW/OIUDZ09Jje1rWJZJLkIJNdP6wXF1c8tQ5K/oMvZM7TO8U28l6lbrOm0uFWK4N5G0wIXoJOCeAut/lw+v7U4JazE27rKScX7S2Mxj23ey0D+O17e/jo6kWu/axb7DKOGusT+g0S4p8a9P/5Xa1p5oRwjr+9YUTf8whDAYr+scE4ky1cyOJa+P5HdJZCXgQ4yGiUuCs88+8ALabPHcJ+zvOYzQ5eWJqst40jiYQttPuLp+4AKekAYOSVZadJLiwmq+2hrMGsusRz4joRF2du/rVIxOdDuOu0tRNTuzErcfr10LP6rFWFe/CoDFsRheCRXCS1uiDU1IqoyuAu6ULr7/+cO5YoeVK7FqV3qxPtOabo42px6HMDENH8sDRzCmU3Pu+rAScMUekJAG7j1xobgD/LCphe83tVCi+fC5u7oykhx10U/Z9+ATKbB95ut2NQx4vh1xDSciKy06iWf8XC4PtjMpkvYxbmpI39LOSFgLWmsbtw94Hor8QLPdd46eLHBTx2dX0uxIhLhz/UO4pGScbW13BBO0x9vROlngC2OWEWBKFz6Xk6pIFcs6dDYG06UcwhlJP63xFnt/k7CAZqOaWV9/jIJxJ3JJa1uqANwnDbkJtx0ulIAPMXFMXFLskQU+UTf4UkcITXqoKuq6op6kxO9mnxO/RsK0/lhc7oGX3DSFRGQ2qM3AecTVRGf/D9UibfnUdqQtoqqEZYVtaN7R5VjF6ESz68cL0am9X1LAExF8dlTIv7ffTVC2sigWp3GsVddnTmSdFZYqoMw0rDo8UnJWuxWdstOj43M7aJcFLEp0EKcl5UcPN1r1Vcp0SZu9wBnRIkghcOKj0OsicfyNXBls4w+7GnFKyepduQm3HS6UgA8xUWniNfcgCiX5xQfaKNytBQ6A0817YhEOKVMxtQPBFFZtZ1cPPzj+GUvxZEQSxBwO9o/FuWVXI9VYC6zb2nb1+/Wf2fwMx/7zWJ6rfS7vIwb2BtIC3oMLRQvjs+ucGIQpD1fwl/oGdi65FoAyQ8OQBoYQCOmixDCZpOkcHrUs5smhMrxuJ20UsK+dDby22RLuiG4lBk3V47RrrUT1KG0Ry43odRQghGDa+BrCk5bhxApF3NWe3xVSlYAPMVFMfKZjD6JQ0gL+nLG4T4ecvHA6BaZMleYcCGaqPVYPX5WCKjr/pEzUdY6ORCkttIr3N0Wb+vXaz9U+x3de/Q67Irv41ivfYvH9i0dk7G5rrJWLnruIXeH+/1CNFvRUGGEnAa/e1/q/bAqBjLC+o8wtCMBTYP3YF2c06o44ypmsa+wfj1NsSj7cvJXpwYn43A7OW7YgVc7hw/r3AAjHrDvBiZpOwoyx5G9L+Oar3wKg3JlecC+44BGMk35NoSmJ6q28V/9e3hoHSsCHkMc2PIYmJG5zz1wo8eN/xW366Xwo9+nTIS5vIYWmSVQfeI0xU2C3x+phvgVVTNUsV82cuFVMq8iw/gjHVc3EISUtsf5ZOd965VtZzxNmgmc2vtKvcw0m//r0X7xX/x7HPXzcXl8hL2mBOxydXCizToKLnoNFF+Jxp4u1Vdl+bm9RVwGPO8dye30jP2i23CEuoI0CvC4n5RXVlJiSg6NRnvj0CaSUROziWIszirqtsTOBKz3j0nNxOHEGyikyTbYYH3DRcxfxz/X/zM0bMMQoAR8ipJRc/6bVsMEjHX1PpQe8h17KTfpZLJnaNWyvO4RdPyWmD9yFYohOHcY7U1DJN1qC/G5XI//TYVn8IXvBs6ayjHLDpC2eu9vUVdsHfleRa5K1qyWSrzz/lZzVYs9HNN2yirv4wAEmHQxC4PSkLfAqwyAqPfgDBTy435+yBHyC8yAKpLRKOdgctmCu9cCOKz88EmNntJH2RDthu9zsIdGuUU9lhZOzB7xFFGa81iMbHtmj6xwpKAEfIjJ7+HlMkepA0leW/+BY7rtoSd929gQoNCUJY2BiJ6XEFCCko+c7hoIqPMAxkShj7RrhQaeD1s/ej8vjp8owCGnN3R/bD9yukdcL0ZfRXWld6zru/+R+Lnz2Qi7/z+V7dJ7na59n3r3zsjrPmNLMq1C3ZBy409HzYrtwpbdVGgYxPBR4nLRVL8GnW9Er57S1o/knwyFfg1PTiWInHrzAemC7FktsEd7R0Zxa86kwDK5pbuWOnQ2c3hFiSTSGr7Q6exKdBDxf75yUgA8RmenlbtPZdxeKTWWht/cFzNQLBCgwTTRzYAKeLE4lpAPD7MFHaPfsBPCGx3NSKMychhn45pyMcPupNAzCRv/81mXesi5joRz49XNNZhEvgC3tW1i+azmvb399j87zl4//AkBtW21q7J7V93Dcw8expT0/Gkjrdgu+3Qk4GQJeo+uE8RHwuLjwsKmMp4AHttfz7ZYg0YLxcMLPYHFG/8uiMdb/djnmpIDv6mglooXxmSZPeE7ngvYODo3F+GlTC3fVNzBv5vTsOXiLKMyw7BsiDRhm17jzkY4S8CEiU8BdpqPvUSj9wVNgCfgA+2wkew46ECT0njMqPx13Kj/VzuYjcxa/amwmoBXjczvA7WecrtNhNvVrkajYmy7QNc6uAd2xmwJGw0Wk01pDfy1mw67/kblQu7x+OQCbgpv6ObuhRbMFPFm8qlucXr7T3Mo3WoLsm9Cok1V4XFaZiAq9gXmJBA4g5q/uemyhLeCTDiGx7MeU2OstDZFWXgnV4peSjlldu0TtM3n3LhRd6jlrgjKUKAEfIjpb4H2OQukP7gA+KTHkwNwN8VRlOQexbtKak6xc/EvuNE5hu7SKWy2s8VguIpePKZqORqxf0SMlHqvWym92NfLgjnq8phyRFngsnB1lk1nKVDP6/hkkf+QyfwCK7CJlHdrI++HqDs3+0Xc6fD3v5PJwbnsHX2lrRwD368emNunH/gSAJlnM9raM9y5ZUTBpvQuBZ+nV+Ox6PC/XPc9mrY1Wp5P24pkARERGx6pO3avwFqfKIXuTbphQ/uUrKAEfIrItcCeubjIbc4YngEdKTPTe990NMS0t4HGtZwv8mNljOGhqOQfNmw3AgnJb7N1+ptjnqG2v7fbYbR3bstpmZRLR4swK+Tk+EqVk+vEUSJNIDmLbc020fmXW87qOutTjgx84uM/nSd7x7IqkwxGTAp4vPlrNsBYQnd2UcE2Rse167XyeMg9JPXcd/g1WXLiRw+O30BRKR5Pwtffgwme7nKrQa0WvrGqxesDuE9dxu91w4TMEvv5Wz3PwFNJsF2g72s7K7G+463CiBHyIyBRwh3RR5OtjS7X+4C7AI8FkYBZ4VE8KuGC/8cU97lcScPPQpYdw0uEHWQN+23ft8qUEfENL9y6Akx89mSMfOrJbF0tEi1FOhOaapThmHkfANNG0YP8vaJCIudypkr9gRaMkSZiJ7g7plkbb8s60wD12VEdTZOBlEYaChG6JrstZ0PNOY+elHu6S5dxwWnbXpnkTKzn3iNn89LNz04Nlk2HyIXSmsMByqbQmGhivm9yxPWRV7Jx8KJRNgS8+AKf8puscXB5OD8U4KBrjy0Fr0Tgfo4eUgA8RmQIupIMSv3s3ew+QMXPscp4DFHDNEo/KwgCHTq/s/YAJi+B/7oITf249d/up0Q08puSFDR/v9tB1reu6jEW0KBUyghi3AAKVFJoS3WjvevAwE5V6quRvd/QloUo3daK22DdlFGKKffoiALuaPhngLIeGRMoC9/e805TDUw878HP+oVOyNjsdgutOmcM+1UX0hq8oHd+9KBohLr3ZBd9mnwIHfqXbY2e5x3BnfQPVdtGs1lh+uKkyUQI+RMTsBIfL2k2Kde/gWuC+EkKecejCHFCGWdy2wD3OPZjrvM+Dz+6PWT0HJzBJ12huW73bw7pb+NP0KD5pUjhxLgQqCEgTwxx5VlJMj+PrJkrnpJAl3LXB3ot5hTMWZ7cH0z70qB1+2qGPPN9/dyQtcKdnNxa4EDDndGt/ObC/A1fJWA61XSBzg1Vcql1Ngbdv0VqBs/+Ph+UyNhjWAmdzRAm4ohNSSl7f/joh23d7UnsTzY6KnlPTc4QLF1IIdNl/P3jMvn13du6u0ld8xXDNeqZoOkF99wtEmeU/k+hmHK8ET/VM8JVYse0DjKwZDKJGDL/sukYw067GuKah94JJoYzrb4jWpXzeUdsd0x4fedfdHalFTNduBBzg9Nv5oXYB78uZA3o996wTuH1XI89v3c67nMkXTj2ZE/er6dOxYuw8/lFzLRGzEKeE1qgScEUnXl7/GJf/53Lu+vB2AIrMBDvckwb9dd12Oc/kwlh/iNkWuNO5m4iC3iioYrxmEBS7tyA3te7sMmZgdQOiYjr4Sig1DBKiayPc4SZqxPBJyR31DVwYTLt45tulBdY3916yNGQv5E7UNOLoHPHQEda5hSXgm5u7X+gdacTtMEJXbwLuLeRr1/6St79/3IBeT0w+hG8mvsr7iQO58epLueCwqfg9fTc4rjtlX+J48ZmSYGzkLZD3Rq8CLoSYKIR4SQixRgixWgjxDXu8XAjxghBig/1/16wLBZ9ueh6A2oglUD5T0uiburtDcoLHFvDkH1R/iOdCwB0OvHgxhOzyY5Lp3rlv7R1c+OyFWdt0YSLxgrcIfCWUGyZxRwLT7DkiZrDRTI1/rv9nVmZtzEjgNyWHRmN8szXIt5tb+UljM4tjcTym5MHNN/PI+t2naoftMMuJWvqOaVd4F2H7PTJF/3+Ih5K4kbBKwLp7/86MKfYxpngA3y2bY866kqmX/wNfQc8L7T2x/8RSDpk1Hr9pJYlt69jGb5b/ZkCGz1DSFwtcB66RUs4BDgauEELMAb4LvCilnAG8aD9XdGJjIjv+2S8lwcDgC7jbzlSLDKAbTjx5O7y7kLA+zcWKwU12HE+SKYIAy3ct77LNwF4M8xZRZhoYDpPmyPBZSs9ufpafvPUT7vzoztRY1Ezgk5IvJn7AP/UjOa+9gzNCYd4zZ5OwM25//NaPd3vekO0yccp0fsCKxhWEbdeMdAwsJHSoiBsJq4eqq59ut35w2v7j2G9cSb+Pd9i1gyJ6hMc3Ps49q+/hTyv+lMMZDh69CriUcqeU8gP7cQfwCTAeOB24197tXuCzgzTHvCWiRXi1PR0+5zVN2mUhuqdwN0flhqQFHtb6L+AxLenP3E1EQR9wu6xogpZotvAmF7wySUZsJO8cJLaF5nBSbLfpqg0OX0idx84wfG/Xe6mxDiNOkWnytrkvtxifS41vqTkptZAJsKVlQ4/nDdvtwMbG0gknDZEGNjot4TYd+ZHmnTA03JLBzXPIMU6vnwLTJKZH8NhVFB9Y+eYwz6pv7NG7LISYAhwAvAOMkVImHZf1wJgejrlECLFcCLG8sTH/UlUHwhtbX6TDjLPIro5WbRgUEeWQaX2rKjgQkp2/Q/H+C7hm9zccqIB77Wy5xnAwazxuL+z+b1sH4xOWq+DgBw7GlGZKwE1H2pdaIqx5bAkOX93tZN2Tja0bU2PtZoJCE0DQIdMC/IXTP8v/a2rm79utCJvnNjzW43lDdgGrmrYJPFxn/Vn96r1fpbZrjvxoDh03LQF399S7dQTi8gas8stGhHCrZXB1JHruVq+bOqc8egr3rb5vqKbYI31+l4UQhcAjwFVSyqxgXGk5M7uNV5NS/llKuVhKubiqqmpAk803aj/+OwAnha0IApeE5uJ9+drRMwb9tZOWRGgALhTNDm1zuwco4Ha2XHM4O4ZbS1jWaXXCw3nt6UW6jkRHygcpMxJCylyWOG5s3Tyg+QyEZDhoq92yS0pJh6kRMASXHjmNG848KL1z9Rw8OJibSDArnuDdXSt6PG+b7ULZYkxkH61r/L4u5IgttnTLB7ekardrSQHfw2Jtw4nLG6BQGiSMKKHNLwPgdQZ73P/uVXeztWMrv17+66GZ4G7ok4ALqz/SI8DfpJSP2sO7hBA19vYaID9SxYaQLdEmqnSdmYn0gsiLC24ZktdOWuBhrf9RG5rtznC6A73suXv8PisJKNiRbTnHbQFfY86kIGNBMxgPErN/PKQzncwx0VuG35Q8XvuHYcuay0zIunvV3UT0CAYSnynwuZ18dtGU9M4uD+KbVgLOrESClc3dZ6M2RZu4pfYJ3FLyqTkFJ2Rldp7f1t7ltUcSd358J8/VPgdAwtRxSQY9TDaXCLefQtMkboQI2TkPhqv79zqshbntw9sAKHQXDnsnn75EoQjgLuATKeXNGZv+BZxvPz4feCL308tvtsRamKTpTLYjC5a2OxCF3VRYGwQ8LssCjw7AAtdtkfS4ewkJ64WAXUGuoyM7VDBhJ6eERBlR9/jUeDAepDVYa722K+1uqi6q5tZdDYTNEDe8+NcBzam/ZIrob9//baruid904HVn/DkV29dTNJbnS89iRkIj5gh3W9Pk3ndvwoXg1w1NBKUVSZGslDc3HmeS/f0JJ0Z2LLgpTRLSwCUF7m6aYI9YXH7GGAZRGaTN9t2bDqPbH8xgPIhEsiAWJ6SF2NrRe4z/YNKXn8nDgHOBo4UQK+x/JwO/AI4TQmwAjrWfKzLYIWNIrYxnEst4p3YbJ7U5CexBjOpA8NoulEh8zzP4TGny7Ve+zV07rEgL1+6y6vpAYbGV7nz7tjtSvu1H1j/C5168FACHw4sWmJbavy3eRmOb5Sbxu8emxkXVbA6KxSkxDFZufnVAc+ovnf+oX972MgAew4k3GXnxjY/g8vQi2NzTrmKa7RbZ3N7V/bNj1T+YkEhwTCTKuEorGrfAzuwsNkykad1NBeMjO055//v2R5OG3QQ7fyxwq+SDjsRkk5n+fOtDXYtbtdtdf46KWD+mb+94d2jm2AN9iUJ5XUoppJTzpZQL7H9PSymbpZTHSClnSCmPlVKOvG6zw4gpTZod4NQDnH7wvgSkJIG7700ZBojPLrsZ7YfVtiu8i2drn0XH8rl6BijgpSXpxKWt7ZbFkhlW53Z48XrKU8+D0WYakpatf2L6RHZj3Am6jsfVNfFnKOgs4O/UvwOAz3Sla3CUTU61/AIYN3YsE+xuRZmVClPndDhStVQmVpdzVvyHFNjhgyWmSatpvTfP1/4np9cyGMSTAp5PFrjbzzi7XHK9jOOyP4vNrV3LO7TZi5vz4wkqdIP73n9+6ObZDXn0M5lftMdaMYTAYQTwB6ywQY0htMBtAY/1wwe+PZRdu6PIX97Dnn2jtDJdTP+Tlk+61F12On34A+kgpmDHDupD9XhNE48/7VpJCvhETWezp5WGYajQF402U2bAERuPw0NJKhrFbbiyiyhl4i1OCcSObgQ8JkQqFd/lC7CBCSkLvNCQdBjW+//nVbeyrnljl+NHEq3oOKQjz1woPmr0dJx9skn35tau5R+SFnixYbJ/PE5jrGsRtqFECfggsLp5NfV2OJKPYpze9CLgUAm4z2XFT/cm4I9ueJSbl9+cNZZsSHCF4yweq9vJ9KqJ3R3aZ4rLajgxZN0JXPf6dZzwyAlZ291OH4GitIB/vGMt/9f0DiWmiSzMiFwasx989R1KsO4Ifv7Ozwc0r/4Q7dhJgalxu/MupF5Ca7wVISULjXq8Pd1dOV34pKRSN9jWqXY4QMThxG8Ltscb4IVvW6VzARyGj/JIKVe0BgH41cvPDcp15YpmkY8ulAATND0l4snl4y1tXYuQJfuVRvRSZicSRJ2ttEaHz7WVR+9yfrAjtIMvPvVFvvOW1YE+4CwFO47agRwyF4rfFvDobkqZbgpu4kdv/oi/rk4vCIYSIb7/+vcBuHDTr9lH03D7BpZ45PW4uazRg+hhwd7j8lNZNiX1/NnmlwGYmtBx+Tpl2FXP5vPSWghe07QuK7Tu3tX38vbOtwc0196IaqGUu0NGLd/0wlicSTKCtpu2c2D1f9we6ZoLEXa4CNjnnDy2goqSEjS7Y5NHK2T+lBrObrOicnaFgrm6lEEhJMAhHfnlQnG68AD/3raDw0IG32htw2tK6jq6uumCdsLVFmMy+yQkCHhx0wdDPOE0SsBzTDI+eHPIulUu8laBHUctkENmgRfYLaSiu7HAb1txW+qxburcs+oe7lj5x9SYV0K9a5xVGH8ACCFwVM0mmSW+MJYdGeNxBygpn8jHm7dyqp256JTwk/o4vm7erwnlk/hSi8bOSB1X/vdKwFpzuGn5TVz8/MUDmmtvxLRIylpeaFo+0uMiEb6tXUJzuOe6Mx8cfAs+Kbv9PCJC4DdN/qqfwH7jy8Dp4ivBDq5uaWW+OZkJ1ZUUSImQEjnCinl1F0bnkE7ceRRGSDLfAfhT43aWRaLU6DqN4bQF/mzts/zy3V/y2IbHcUlJu1nCTFcNDgmvbHtlmCauBDzndG4PVhoYlxJwB3KPKqUNhID9mjG950XMHR3pL2gwHuQ37/+Ge9ZY2WU/b7RW4D1n/AEcOZhz9b58ozkIwMmh7DkV+YsorpoAwIF21qohoIwoc8Z1LVBUUFLBmRFrfq9tf40rnr6Ixm4s28EgqkfxSNhUchAH6TsREqZ1lPCs+1jOOrDnKpPT5x2GE0l7tKsAxwXsMqu5QT+fmWOsuPeF8QQXtXUQ8tTg8xfhAApNiWRkRaIkOw6d1pGel5CO/BLwfY4jVpqdXDfW0GlPpBcxv/3Kt7n/k/vZ2lFLiWHSRhGFlfuyMGqwvOk/AyoaNxDy6F3ODzKb9y6IxSkrHpsScJAE3IPYyCEDr8eLzzSJGj1bbO3tW1Mr7p2bDs+wa1mX73NgTuYjq/blK+3tHLrxeJZGsudUUVBGeWkpj522ipOiGg4pOSUU5vaiK5k9tquAi6lHMEPTeK92GwWmyTv1H1IX6ro4OBiEjThO00ljxYGc09HG33fUI4xi7jxv8W67LJWUVeCU3Tc5jgmJz+Hmwx8eZ7UDAxy2JzbuLsXvtxaki00TQc8p3sNBMjN1bEbT66KEF1ceZWLi9uE758GsoRrdIGJ2rVEPMMbQ+dicSmDcHC5uayZktKTCSYcaJeA5piVqCaFXuDirvYPC4nKw/dFDaYF7PR78UhLXe07kadciTLZX3F+rey1r2zZ9Au+e+rxVyjUHuMfOAeAO552MNQyeqNvBH+obuHvnLsYWW5maZyyciL90Eu9sqeOnjc3UjTm6+5PNOZ1Xqs/FJyWXBtuIO3XWNK/JyTx7o81M4DVcOALlBKRkv0SCHbKCmpJeyg14i3EByOx0eFOaxB0CNx7KCjxdDtPdAbx2a7wi08SqJzdySIZVjjHS11UeK8svCxygdCJU2705px3FWN0gJiJohoZhGggEQlo/0MLw8bx5IN5xc5hjZ1lvahmejvZ59i6PfFpiLXhw8N6mTZwajlBSVgl2KrpADll4lcfjwW9KYkaUG9+5kX+s+0fWdiklHZipLNHfffC7rO3PlH6FJYsPIlcUjpuFJtM/Xr4DvsOR0RgHxuKUFGUskpZNwSclcemluLi0x/MFp57CBnM8E+z5v70jXYrW7KY7Tq5okzouw4OrMJ0h+uSYrzKpopdyA04XDimQZAt40oJ1i+6t9w7dg7CrNhaZJrocWS6U5Py9huC7zS2M03Q80aohW+vJGS4vfPVNuGY9fOkhSkQRCNgU3EFICyGR6DErqcyQLl799jIc044iZhQhJLyzpfemHYOBEvAc0xxrpkxPkJTp0sqxYBe3n1LuR4ihEXDhdOOXJnEjxgNrH+D/vf3/sraHtTCmICXgnXG5+l9fuTuKCwvYItOhgrG5X6JOWpZ3eUFGvXF7wbRJllBV1HMd8pOPP4HQV97AU7wAgI8a0uF5LdHBcTPE9BhxAS7Ti7c4Hd64ZO7sPp7B1UXAkxasm+6vtUVzW+KCJeCJEdYTNGbPP2YGOLs9xHN1O2iXpVQWDqyG/LBRNAbcPsoDVvbwh9s302ZHnhxjWuWANdyUBNzgK6ah4iC71d/w/LAqAc8RD69/mOX1y3lrxzvMSqT9nNUlAbCbKxT1sdlqTnBYscdxs/vFlY6EdSs+uZvKdwBOd25rljsdgmZsf/bJN+EpquTcxPe4Xz+G0prp6R1LraQfr9Dwe3peL3A7HRwwqYzJVZZ4BrW0v3JV/eBYQ8k/ZIfuw19SmRrvbqG1W4SLdJSxRdQu6OV2dC94zZobDrqMO/RTLAFnZEWhRO3aLjHSrraYq2TIXIWDxYQCK1R1bdOW1Od+QjjE9ESCec2TKfJa3815M6ZRahh0aMOzNqEEPEfc8NYNXPjchTRGd3FCOG0l1RT7IGmPD+KtfRccLvymJCHTAp4Z8tVufykxu29p5fAMrAJht3htofOXEfC42Cxr+IH+ZSqLM/zHs04CYK05CbObTu+dqZkwj1Lb/+qx9/9w19rcztsmGA8CIM0ABaXW3YQhBfuO7es6gRMpOgm4fU636P5zmFJTBW4frhN+SrUGIUcs1fRiJBCxo4EMR2l6MDCwzN2RwNTSCQgpeW3XE/zv0/8LwERd5/Ht9VTF/TjsRVoRqKDMNIjqSsDzls4+10m2VRv1jbE+6AL7dnufY4ZuUg4XfimJZnRx79DSC2Bbg58CEDFKU2N/qk+npjsGWAO8Ow468WzrQdmULB9pllupcga1Z7/BDa4rOWneWHrDWz0z1UdyUTyGU8La5sER8DY7VNHUAxSXWxZaw9TTqO5jX0eBEyOjbP6KhhW8YyceFTm6t+K/evz+AHz5iGlMNQuRgiFbsO0LoeR74kyLtrMg/wW8sKiaKsOgIZHuovRQ/CQ2mOMpW/g/6R0D5ZSaJjE9OPSTBIYmpm2Uk3RHJPEZguu0izjzjPNYAJZf7apVUFQzdJNyuvGbJtGMbvB1HXXMqbCiQW7/6A5cUuLUq/hsx3oeLyrk4Gg6YsXhHQQLfOH5MPVIKJ+Gdzd1lKfMmMtLP5rbt3NWzmSCrvMxXqp0g0kJg62hwakXEgxZmXmSYnz+AFy9hpqCql6OSuPAgZkh4Oc+c27qcaWz+y5N3kBa2GuoAhr5YNcHHDg2N+GdAyUctVxXhiv9PniKKnvaPX/wlzNB12lwWRL5+fYOVmoLmXzDPczIrHkTqKDUMEgMU3SQssBzQNJHlmSnMZ6/GcdSMXFWerB0IjiH8PfS9oFrGd3Mb/vwNv644o80RZvY2L6ZS4Nt6I6J/Kiphde3bGOFOYP/bN3Oc9u24/Lk3gJHCCifZj8UTCoP8N2T+roA2AOF1czRrPe10eVksq7Rmhiclmsh213gSroLSsaDq2voX084hAOjmzXsMsMg4Crt/qBMV5Z3InNiGv/d+t8+v+Zgk2zGLD3pu6Xi8m67K+YXgYpUHfbjwhF+1NzKOmNCKk4/c7+Juk6U9i5JfEOBEvAckPSNJqk3rBXsMX28tR4UbBdKJq9tf43bV96eKl51SDRG0DcRF1BiSp41DmSMYTBON7pNYc81r35nGZctnd77jrtDCI7xTGNBLM7/toWYqCeImo2D0iklkuwc7+mfi8CZSs/JXo+YqmkIdw/flYxuSHWlizgqGmZNy5rdlkgYSsL2d9/MqBo5Z3zZMM0mhwTKOSZsvcch3EyN3U+YbowafzmHRGMg4Il1Lw/tHFECPmA0U2NzW3aR/q3GOK49cXbXX+uhxOHKyo4bk1Eu88lNT1LqKmBOPIGjbHKq2Nb3vnZFah+fK3+iCHzTj+X/du6iJlzBeF3HFFqqJk0uidq1oN3e/gm4wIFhV/SKGWl31X7xBHF/D1ZrRhmD+oqDKTesn4CG8MjIyAzF2/GbJkbBuNRYd9mzeUfJRI6IRrkw2M7hTdVMrSziya8d3nW/iulMjrtwSnhwxeAWUusOJeADwDANTnn0FH7wxg+yxvWyfbj8qAFalgPF6ebc9nQT4ZsbmlheW4/HtO7hj3ZX4pCCgjHT4esfwFl/w1GVrgcxVFUTc0Hpsd9kYexPrJl9JTX2j9bTm57O+etE4x24pMTt75+P1ymcKQs8M5Lk4mA70aLJ3R+Uib881WqtObJ7AX9j+xs0R7tPBc8FHzV+xA1v3UCH1kGhaSILLRdKh6ucWX2OyhnBlE3GefFLXGxWs2Tp1/jXlYczb0I3uRFuP80TTmSMbqA7Wnh64xP8/O2fEkoMTVy4WsQcAFs7tqZqZ2eiFQzhYmVPOFx4M7wIBaaJV+q4MUjgYGbdh/zbPJgxk/aB4hooHpd1+FC4UHKF2+ngnZ+dhSvSSMPvrFK4D3zyEOfMOSenrxPRwvhNibewfxamZYFbj5N/4Dc2NFFmmuDvtIh57mOwa3XWkN/jwmtYNlfzbpKVEkaCy/5zGVOKp/DkGU/udk7P1T7HvuX7Mqm450Jc3fHjt37MhtYN1Dj8BEx7sfW02yiathTyqQ7K7hi/kKKr36O3VZrpc5cwfsXrfOKs41rbmKtwz+LSRWcO+hSVBT4AWtvSCSP7ZHSedxSNgEUcO3nIZ1tsSX94wg7ZG6sbPG4clqp+1xnfcLp/+oHb6UAUjaHQP4HD2p1sac19h7+IHsEvTbwF/ctSdQonhhBIKVMWeGGykUPnO57pR8OhV2YN+dwOPKa1X2usnZ5IFiarba9l3r3zurj4ksT0GN965Vuc8tgpe1x+oMRjvQc7zSg+U1DgdcHCc6F0z34IRgVlU6k2DEKOdEG15zYOTa/M/PorHWG0tlqx1D9vaOKBHVbkg880cRcPTef53WL7Tr/eallqpbbvNNkoYIxu8Lo5j+lV2f0upbC+EsO6ADsAYgd8mSkyiMuR+0W+sBYhYEoChf0TcCGsz8SQBiHNssALpcnJ8Z/THOq9HKnP7cRtWj/MwVjPYWudK0ue98x5vLn9zS77bWnfknpc21bb6+uDZd3Xh+tZ3Zy+O/CYDop8e/HNfPnUVFXPi4NtLI7GaNY29HJQblACPgBaIpZoHxiL45eS17ds44ktrZQWj4BFnKJxNI47mnPbO/ho89ZUx5ckf4yezaJpY7vUZhHX1rLjsnUcMSM/Y3krDrsQn5Tg0HuMRDGl2a/OPWEtjk9KSgr7FyPvJEPAbRfKdn0ca+SUPtXI8bmduAxLKNviPVvgnX3fwXiQK168MlUpM8mnwXS8/Af1fUsOuvC5Cznu4eOymjuXay4KdlP2YNRTOpkrWtv4YVMLV7a2MT8ep9WoHZIa4Xu9gG9p38K8e+fx+vbX+3xMMBbkzo/vpMluqltmt/UqMSWF0qSisO+xwYOGw0HtcXcCqUR+AI60a3E73WP464XdJIP4Shg3tquw5w2+YgxnOaaQaGb3dV6uf/lWLn7+Yh5es2fx1BEjjtcUlPj79/k6bAtcN3RCdvJXs1HFETMq+fLhU3s93u924jSt126P990CX9peji4T/Oapa1NjW9q3cOfKO1LP392e7W/viY8aP0o9lrpVL2dGuIDykfCdHy7cPkqO+w1f6AghsDrWSww+rF8FWE3Cf/zmj1nfuj7nL92rgAsh7hZCNAghVmWMLRBCvC2EWCGEWC6EWJLzmeWYP678I5957DNdrJM3tr8BwAtbXujzuW5+/2Zu+eAW/t34PgHTzFosbJOF1JSMDPdDSaDrH9VvGpp4btt2vEVVeRVpsidIlxXmF9G6r9z3Vt2HAHy0Y89qOEfMBG4pKA303LhhdziFZaUmDI367e8A8J52AF9YPLFPn4XP7QTpRkjo2E2UQ0un7kS3Na9gWTjCf0Pvpe5KfvHGj9jQvpnjQ2FmxhN83PRRd6fKIvOOZkzrmby1bR237GokFJrLvPG5rV6ZbwQOvpCdF7zDanMy+9rrYY+ttkoc7wzt5JENj3T5Yc0FfbHA7wFO7DT2K+AGKeUC4Hr7+YilNdbK7Stup7a9lq0dW7O2Ndlf9mJX32+Lm2PWj8CWeAtltm+5Q/r5+/Rf8d7CGzls+shwP5T63STsGtxbD/wBLDwfn5SM0w0co6BeRU94hPXDlXmbn4mGJewmiW63d8fqxlWsI4zbdFDWzQ9jX3DY6xL3f/IAt215irG6ziv6QRw5o2/p+H63kwRe/Ca0JXqOc2+012YyWRKLE3IYqfj4SLPlo/1+cyuHR6Nsj63uUhKiM8nv/febWrij5QGKMDg6EqV8/1Pyr4HDIFAzZTahC17CVb4YISWNUUtbUusdOa7wCX0QcCnlq0Dnnw4JydqglADD046ij7xa92rqcaZVJqVkxZqHAOho/KRP51rXso5VTambEcYYVoKMB43DTzmXz332zFSlsuGm2O/m1/pZAPgPuggWnpfa5i7ovvbGaMBjl2YNJbpW7Xu17lVCptULNGwEez2XYRr8YcUf+OLTXwKg1uNgQln/ygw4bAv8r6st19aUuOD6zx1o1ZbuA36Pg5j0MDUu2BLq2We9qnEVs+LZP06uhCUem1stAyZixFgaiVJhmhwTiSIx+PenXUMOdVPnrKfOYt698/j5278EYLyuM92xk3YZoOl/n+fyL5zWp/nvDRw0rYKqaYspN0yCcav0QihsrZUVan03GPpKf382rwJ+LYTYBtwEfC9nMxoEdkXStTEyrbIVjSt4z7SsjsY+ZO5ppsYXnvpC1q3Q5IRlgd+kf2G3DQiGA5/byaZ9LuT2pe9RVVmRLucKeItGrwXuc1gurPZ4CCkl9WGrOW17op0rXrwCDWsBMNiHz/y+Nffxp5V/Sj0fZ5pWyFw/SLpQNFNjggGnNlTsto9mZ7wuJzE8LIiaNCZqu03U0UyNNeFtLI6lF9DqZCXF0vrs1zTWAhCUOkIW8JOqm5hqFjE3Hufh1f/XZeG3JdaSqn74wtZnAStzFGCbrKJsnxHvPR1yhL+UasOgQ7MEPNxqhXEWttbm/LX6K+CXA1dLKScCVwN39bSjEOIS20++vLFxaDqHd6Y9Y8U+U8CTq/LVuk5TL7ePAHE9jilNTvCN44KgdU6f4eL3Ryznup/9cUT6lO+6cAlfXTbTemL3tzSkoKasYDdH5Tdep2Uht8VD/HP9Pznu4eNY1bSKD3Z9kLVfeyLY67n+u/YfVNvZnQdFY9zc2v8UdkdGWvw5wRa261P2SMB9bicx3ExOWHd9mYZJkm0d24hjMCeR4MeNzdy7YxfHxn+NT1r1SZ7ZaK31tGFi6IWExxzE60fcz/HhCOvCdcy/bz5PZljinev8+E2THYFDADBw4Bwhd5sjCm8xVYZBvb6SpmgTHbbOFBbmPsGvvwJ+PvCo/fifQI8/w1LKP0spF0spF1dV9b30Zn/Z1r6tS/hOW7wtFaeZ6UJJ7jdON2jSe099TdavWNDaxEEx6/H4mHeP/giHlaKx/Nl7AUfGf8ek8kEoFztC8Dita7t71Z9TreRe3fY6y+vfwy3h5S11zI3H6dB6t8B3hHZwcDTKv+p28IddDZTvJv66N5wZfS+rdIM/6KdTvCcWuNtyoVRIK7qmMdJ1/vXBWgDWx/bjf0JhFsbjxPAye2w1p4bCfNzxKh/uXE9UgKl5mVju59CFB3BkOKNq5Qfp6JS2DivT+M6du/h1QxNX7fThPMxKMKoQPYcy7tX4SiiwE+guePYC2mItuKTEW5D7/JD+CvgOYKn9+GhgaKLWe+GVba9w8mMnc9/q+7LG26JNjLGtqIieIeD24/G6TpMexZQmG1s3siPUvUs/YVhfchFu4/BojOe3bicSms+iyXlSfU0IbtdOYTtVvTfhzWP8LuvuYmXzh6mxf3z4DMu3vcK8eIwK02SCptOm9bx0Y5gG2zq20YhBmzaeutOXZ0Ub9QeHI+16STjGEsa/Rz/+xT43S2aMo8o2PD5tauiyT239xwCsTcwDYJM5ltevXUbx4ZdyVrv14/P0+petnY0ACyeXUVLoZ1xgHBc0Wtb0zvA2VjWuA6DN/lsoMU1ODEdYFV6Ke+IBADxmdFPcSQG+Yi4LtjE77mVL+xZeb1tPoWkiArlfd+pLGOHfgbeAWUKIOiHEl4GLgd8IIVYCPwcuyfnM+sGbO6xss0+Dm7LG26LNqcXGdQ1pv2Eialkw4zUdU1jRKmf86wxOeOSEbs+ftNhLTWtxrMYwaK5Zytw8CqGKadYP2cSy0SvgBe60r/8nu4KcFArTJuv4pGMLi2JxQtLHrIRGRDbSnujeirz/k/s5+dGTkUKAVsIBMyZaGxac3e95OR1psQ7F/FQUeBi7hyGn02oqmSit719jpIVQIkRdRzqF++Md63FKSZs2Fr7+IWOufo0JZQEKZhzGOrkMISUP1t4KgMdZwiHTLFHxzT6Oa0JbeHj7TrzS4KrnbwSgrt1a9Exm8r5vzqCkpJS5sTu5VZ7Vz3dilOMrYZqmc8/OjbiEi42JVktovblP8Ot1NUZK+aUeNi3K8VwGTEy33Bpvba6HI62x52ufZ3t4J/MME59pZkUmtIcs39R4u9TqlvbdN8NN2O6XzG4yoaJhrjq4hzxw8cH895OGfi/E5QOl3jKwP+Zjox00uYt4ptC6e3J17Mf2z1zD7BctIV7bvJYlNV09gJmRRgWecRR6XXB9C4j+h8u5nGkBj+slXHvq7D0Pv2utpcQW05ZokPOfvpT1bR9R7ivnz0t+yFPtr1Jmmtx80QlQPo3kSocQgrNOOIX7P/iIWo81jzn+fVMJW+LEX8K7f2ZWQuP0jjAPFq1kZcNH/Gbd/QB8PO9GtDHl/GXmKZQFPIQIcN0J+/b7vRjV2EJdICWHNpbwamUzLU4nOHIfajmqgjcjtoA3hC1h3hnayTWvXMOuRJAS08QvJYmMLu3rtlvhZEkBf6MuXYCmuxjiuL0YutpMl111Fo6MmO++snBSGd86YVbvO+YxTo+Pp7bt4OZdjRRJyVS7R+lR4Qj1zsOZtXAZ0xIeXBKeWPdst+eoyVhwGhuwhcrhtLoK9XdejnT8eEQvZXxpP8IRD7oUNyAkPL39Hta3WQk4LbEWLnnDqsR4eWsbEyd2k9lZPpUa+070h00teMfMS29zOOCcR2HmSRwSiyIcOr9dflv60KppTDzkTCZVBHA7HdT+4hQuPnLans9/b8Cfdqne0pF2472wJvedokaXgNtdSoTTMr+iRlqEiw0TvymzFjHdIoaQkko74/q9+vdS27or7hO3rfeNZvqLe+Hh6ks80hBuP5N1neMiUf7jPIIpWgEzEgm+GmyjrWgGOJw45l3AUZEIL2x+Et3Uu5zDsMsjVOs6nqr9czIvZ4YPvFX2U8CnHE5i8lJkN78jLUaUK1uCfKkjlIo4yqJ0Mue0WX7wutBiPBMWZG/f5xj44gNMllY47PuNb6U2VVYOfgDCqKF0IualbxD7woO4gGua2vlKg5Oplbl3W44qAY9qlgUuXJbQhjPcJdM1Db/MFnDNiFnuEMP6sq8PputB1LZt73L+eDLU0OGD02+HM+9ln+rcZ1cpBoYzo4+k74v3UFE6lUe31zMtLjFLLct07Mnf45RQmKgjxhvdVOoLa2GcwO+3GhSOyY2bLNMHvlNW7LH/O3We3dz1nWcvVHZ7p1A8niOjMVZu3kph+RLOWjyx6z4OB2MzugP9fXs9L22pYygiyEYTjpq5+MZad7oXdATZp72KyRW5D90dXQJuu1AcrhBb27emUljBSj4oME1iduRJRIvwQWIjHimpLB9HiS4J6+kQsbdqu9ZQjqfE3wcHnA37fXbQrkXRfxwZDZnHFHtxjZsPwEY5nopiW9w9ARabxRTpTn73/q1dElhC4QYmahqvGQf2O/OyM0kfuEtKQkVT+5034Cys4rUt6YXLMzrS33OflDy5z096ONBFvXcqDmDC/KNw9eB/dxeNY0pCY/9YnLmJBJWmSWAUJ34NGiXp2uj/MRYNSrmBUSXg8Qy/9a/f+3VW26rqhINyw2BbfCVvbH+DH7/1YxplB+1OJ6JwDOPsMMNSw8AhJW12FlUmCfsHQThGVsalIhuXLeAR6aW62EfROKunynZZSTCSjnd2lk7lm8EGNrat5S8f/yWrWlyofRuFpsmDxjL2rclN9IDDafnAJ2o6JeMHsA7hLaLUTDdg2D8j63KFOY1dkz/T8xwueYkfznySow45qMd93KXjeHT7Tu7ZmeGzdY2MAm15hdMF87/I+uJDmbG0p1iQgTGqBDymhTkiEqXIMGmMNqYs8H9s38l2Wc2RUUvgL/vPZXzSnFH7ZP6ZTNatP4JKw6DCMGiKdhXweNLH7siNRaYYHFxey8puJ0CxzwUzT0C6/Lw3/nwuyijb6puxlBNC1l3VrR/eypef+3JqWygRosCUFFRPY1x/fNXdEJLWj8dEXWfGxAFk5YWy47+TSWUAm2UNlYU9GxjVFWX8v/89ksBu6nc7imtw0ylELV/LCw83n7uDmd98hq+ekJt1lM6MKgFP6FGKTZMTwmG2h7YTDlodR2p0g0bnGI4PZ/i/M2pFFy08k3G6ZdEUmJIqw6Ap1tUHnkgmAYnRG0M9GnDb6d0dMmCFyZVNRvygnusuO5/5E0rT+x3zfVz+cVzZbP3QB+NBntr0FI+sf4TWeASP6eCSZbmL2JkZmEqRYfLV1uDASg4f9g12TDwl9TRUmS4W+qhxxMCTtEot3/i3K29n42lPYJ7+x4GdTzFojKpg4LgZwycl43WDYDxIQ8hKAy4wTVb7F3Fg5CPOanLzUKWWVQhIOBzM0Cz/5Da3i6MiUR6Nr2ZFwwoWVC9In9920TidSsBHMuXjplgPjrim951LJ3LJjreJOg3uLC3he69ZddmKpZPppovT9h/Xywn2YF6+Mt7cWscWs3pg5RcqpjPuyw9w/a/2QTqjtB1+BfPeu5rySAUnfOE8Fk4aYGbwvC9glkzmF9OOUrVORjijywI3E/hMySQ77vf95lV4TRM3sNW/L7/V/4cDTEvUk3VNksxzlAJWfO1ldsGiZBW2JHF7kVQ4R28hqNGAt6gCftzGjOO+3Ou+7slWV6JKw8gab3cYePDmtDOR2w5r3SarclI/Z79QgM+3hxk/bV/erPsRT7V8jc8sGD/g8+IJ4NhnmRLvPGB0CbjU8UqTw6MxAgasDG2hwI4uWNni4KwLrqK80x9qkorCSn7S2Mxv69sYYxg4pWR7R3bgfdICF8oCHzW4ln2Pfx3+BEdEAny1NchLGdEdC/Uc164osELxnjQPzYmAN8sidlBBTUWe1OJR5JxRI+CGaaBj4jYFH5Weyv4xa8EoWRXsvKMXMn2fWQR68F8XGO2cEQqzwX0KDqDCMPhwe3ZqfVyP4zElwqUWMUcNngJOO/YoVs3+LpcH26k0TR6t28nPGps5oKj3PpV7Qtk+SzgkdisPGcv63MRhd9xrnMAt+udwOgSfXzSBm84cnIUyxchl1Ah4stCUlG7mzJnPOLtyYLKW8xePmAtC4C7oPilDBC2xPvnsq9gw/9tUGgbNkfqsfRLJxB+XCiMcbcxami7MNKl0NqfO+Bz7npHbPiXlBR52Yln1ubDAm8Yfw7+dxwBw05n78/lFEwZ8TkV+MWoWMR9Y+wAA0vQgyiZTZbtKygy4adLtfMv2ZRaVzgLDCsM6MBqjOmwnKPzPnfDm7ymsmc2MadOo3GWyLZHdgCKsx/BLEzzKAh9tTK8qhPP+BaaOd59jBuU1Mv3pXtfAm388evmhDLDCrSLPGTUCnuwuPy7uwVk2gQpbwOMyQKhyQWo/T9l4blvVwDOFBdzY2Mw/9DnWhpnHW/8ACqup1nWiMoiUMvWH16ZHKDZNHCqpYXQybWnv+4wgRkrvVcXwMWoE3OP0MFtzMjYWwFM6PuUbikt3VhfxouopLI3GWBq1IkoeNJbRpapxQTWzEhp6cYwdoR2ML7JW9tuNGCWGicOtXCiK/vHHsxeyublrs2WFoj+MGgGPG3G80iAkCnCV1DA9YYUSVkeKqShMC3igIu0nPCl+I5/IyV1PVjqReXbj1nd3ruSMDAEfZ0rc7lHztimGmJPm5b4vomLvZdQsYiaMBH7TICIC4PIy1/Tx7207GNcxnv3GZdSyKE7HyX4iJ3WfduwvYx/NpNA0eW1lul9zu5mgwBR4BqEojUKhUOwpo0aJYkYMn6mTcFnlXU2nj0m6TossyS5GVDoJbfwSLklcDQhevKZ7v6dX6pwUCvNKeH2qXnS7mcBvOPC4Rs3bplAo8phR4wtI6HH8po7usmp7R0pm4I/tIiG82WU7XR7cF7/Agpc38tXplbsN55qV0EgIaIm2UuYvJYoJpm9QykIqFArFnjJqlCimR/FKieGxBHz1kl/ygrGI5poju93/q0ftw4KJpT2er+Pc5yizI1m2tTVw7+p7ATCMAopzEMOrUCgUAyWvLXDN1Hhz+5vMLJtJQreSbBI+K1Fi3uyZXDj2//Grz8/v17mLph+MhpX6vK19F39cYVVk88XKmFalaqEoFIrhJ68F/OH1D/Pzd35OgbuAsBbGKyWtxVYD2rICD49fcdiAzj+hoBQI8ubON0mYCW5saGJ19FCmV6o2agqFYvjJaxfKrrCVKZnsvOMwHcRKcle/otxjLX6ua7V6Zc5MaGxmHONz1GJLoVAoBkKvAi6EuFsI0SCEWNVp/EohxFohxGohxK8Gb4o989/1W3AYHkoNq2BV1CygOJC7LMliXxkOKakLfwpAh1ZN1eLPqTKbCoViRNAXC/we4MTMASHEMuB0YH8p5X7ATbmfWu9sDTZTZupMtut/S+mm2Je7BUZPoIRyQ5Iwo7ikZJMxlSuPmZGz8ysUCsVA6FXApZSvAi2dhi8HfiGljNv7NHQ5cChwxAiYUJxs8CpdOanylsRdUMoEzYoBrzAMdskKqopUGr1CoRgZ9NcHPhM4QgjxjhDiFSHEgT3tKIS4RAixXAixvLGxsafd+ocjRoEpKbIF3JTunIb4uQqrmKxbKfVVhoG/YmJOO7QoFArFQOivgLuAcuBg4NvAP0QPyial/LOUcrGUcnFVVVU/X64HRJRC00wJeFjk1gJn0QUUGlagTplhcsGJh+bu3AqFQjFA+ivgdcCj0uJdwAQqczetviEdMYpNM+VC6XCI3Aq4v5TJcgIB0+RrrUGcVbnrUK5QKBQDpb8C/jiwDEAIMRPwAE05mlOfkSJKiakzza486DR8FPtzG9o+uXAhb2+pY05Cg4ruu/koFArFcNCXMMK/A28Bs4QQdUKILwN3A9Ps0MIHgfOllEPaHESaJogIVWaMU8IRbqtvYFpbTW4tcKBo/88ggJ0TTgTl/1YoFCOIXs1VKeWXeth0To7nskckEh1IIfBLiQCWRmO8Lv343QNvVZXJvAOPQs7ZRI3L0/vOCoVCMYTkbSp9ImFlX7ozDP8IvkGJEhEFFTk/p0KhUAyUvE2lT2gRADyZAi5VjLZCodh7yFsB17QQYAn4dmlZyOOKlZtDoVDsPeStgKcscCQ/0c4D4DPHdF/7W6FQKEYj+esD16MAPKMdwnPmgbx85scctd+kYZ6VQqFQDB15YYE/+emT3PjOjVljSQu80XafeLyBIZ+XQqFQDCd5IeCPrn6XB9c+TGaoeUKzLHBDWnHfXndeXIpCoVDkjLxQPS1egEmCiB5Jj+kxwCpgBeBx5jb+W6FQKEY6eSHgVQGrCNbWtvrUmGb7wA3TCh0cV5q7Rg4KhUKRD+SFgNcUVgPwrVeuTrlREoYl4AsmVbP5xpOpKFQx4AqFYu8iLwR8QrEl4FtDm2mNtwKQ0CwXiss5ONmXCoVCMdLJCwGfWlqTetwaswXciAPgdKroE4VCsXeSFwI+q+V9ZiSszjgt4V0AaLaAu1yqQ7xCodg7yQsBL2n+kF80NAPQEtoJpC1wl0tZ4AqFYu8kLwTcOeNYyg0DSFvgcd0ScLeywBUKxV5KXgg4Ew+m1G6btrPdEvCEYblU3G5lgSsUir2T/BBwl4eVh/6RUsOgPmR1bkta4F5P8XDOTKFQKIaN/BBwoLikhDLDpDHaAkDcSOCUEq9PuVAUCsXeSd4IeFlJKeWGQTDRBlgC7pESt1vVAFcoFHsneSPgpSUllJsmbbrVyCGsx/FIidedtxVxFQqFYkDkjYC7fIWUGwZhaRW0ajIjVOoSrytvLkGhUChySv6on7uAcsMkQhzDNGg0olQYUpWRVSgUey29qp8Q4m4hRIMQYlU3264RQkghROXgTC8DT4AywwABrfFWmswYxboTr0uVkVUoFHsnfTFf7wFO7DwohJgIHA9szfGcusflZx9NA2DZP5bRInSKdKdyoSgUir2WXtVPSvkq0NLNpt8C3wFkN9tyj8PB4lic0ztCqaECzacscIVCsdfSL/NVCHE6sF1KubIP+14ihFguhFje2NjYn5dLnwv4aVML07acxNVtglmhYuUDVygUey17rH5CiADwfeD6vuwvpfyzlHKxlHJxVVXVnr5cFvpFLwDwhHkHF7VsIWoWKheKQqHYa+mP+k0HpgIrhRC1wATgAyHE2FxOrDtck5agFU9KPe8ggM+tXCgKhWLvZI8FXEr5sZSyWko5RUo5BagDFkop63s5NCe4JyxMPfYTVxa4QqHYa+lLGOHfgbeAWUKIOiHElwd/WruheFzqYbUIqkVMhUKx19KXKJQvSSlrpJRuKeUEKeVdnbZPkVI2Dd4UO7HPMamHjxpH4FEWuEKh2EvJv0Ii+xwL16xjys/eB8DpUA2NFQrF3kl+mq9Fg75eqlAoFCOe/LPAbR6/4jBWbgsO9zQUCoVi2MhbAV8wsZQFE0uHexoKhUIxbOSnC0WhUCgUSsAVCoUiX1ECrlAoFHmKEnCFQqHIU5SAKxQKRZ6iBFyhUCjyFCXgCoVCkacoAVcoFIo8RUg5NB3RAIQQjcCWQX6ZSmDoimsNLaP52kBdX74zmq9vuK9tspSyS0ecIRXwoUAIsVxKuXi45zEYjOZrA3V9+c5ovr6Rem3KhaJQKBR5ihJwhUKhyFNGo4D/ebgnMIiM5msDdX35zmi+vhF5baPOB65QKBR7C6PRAlcoFIq9AiXgCoVCkafklYALISYKIV4SQqwRQqwWQnzDHi8XQrwghNhg/19mjwshxO+FEBuFEB8JIRYO7xXsHiGETwjxrhBipX19N9jjU4UQ79jX8ZAQwmOPe+3nG+3tU4b1AvqAEMIphPhQCPGU/Xw0XVutEOJjIcQKIcRye2xUfDcBhBClQoiHhRBrhRCfCCEOGS3XJ4SYZX9uyX/tQoirRvr15ZWAAzpwjZRyDnAwcIUQYg7wXeBFKeUM4EX7OcBJwAz73yXAH4d+yntEHDhaSrk/sAA4UQhxMPBL4LdSyn2AVuDL9v5fBlrt8d/a+410vgF8kvF8NF0bwDIp5YKMmOHR8t0EuAV4Vko5G9gf63McFdcnpVxnf24LgEVABHiMkX59Usq8/Qc8ARwHrANq7LEaYJ39+A7gSxn7p/Yb6f+AAPABcBBWBpjLHj8EeM5+/BxwiP3YZe8nhnvuu7mmCVh/BEcDTwFitFybPc9aoLLT2Kj4bgIlwObOn8Foub5O13Q88EY+XF++WeAp7FvqA4B3gDFSyp32pnpgjP14PLAt47A6e2zEYrsYVgANwAvAp0BQSqnbu2ReQ+r67O1tQMWQTnjP+B3wHcC0n1cweq4NQALPCyHeF0JcYo+Nlu/mVKAR+KvtArtTCFHA6Lm+TL4I/N1+PKKvLy8FXAhRCDwCXCWlbM/cJq2fw7yNjZRSGtK6jZsALAFmD++McoMQ4lSgQUr5/nDPZRA5XEq5EOv2+gohxJGZG/P8u+kCFgJ/lFIeAIRJuxOAvL8+AOw1mNOAf3beNhKvL+8EXAjhxhLvv0kpH7WHdwkhauztNVjWK8B2YGLG4RPssRGPlDIIvITlVigVQrjsTZnXkLo+e3sJ0Dy0M+0zhwGnCSFqgQex3Ci3MDquDQAp5Xb7/wYs/+kSRs93sw6ok1K+Yz9/GEvQR8v1JTkJ+EBKuct+PqKvL68EXAghgLuAT6SUN2ds+hdwvv34fCzfeHL8PHvF+GCgLeN2aMQhhKgSQpTaj/1Y/v1PsIT88/Zuna8ved2fB/5rWwkjDinl96SUE6SUU7BuUf8rpTybUXBtAEKIAiFEUfIxlh91FaPkuymlrAe2CSFm2UPHAGsYJdeXwZdIu09gpF/fcC8Y7OHiwuFYtzAfASvsfydj+UZfBDYA/wHK7f0F8AcsP/LHwOLhvoZerm8+8KF9fauA6+3xacC7wEasWzuvPe6zn2+0t08b7mvo43UeBTw1mq7Nvo6V9r/VwHX2+Kj4btpzXgAst7+fjwNlo+z6CrDu8koyxkb09alUeoVCochT8sqFolAoFIo0SsAVCoUiT1ECrlAoFHmKEnCFQqHIU5SAKxQKRZ6iBFyhUCjyFCXgCoVCkaf8f4CtohqihPFdAAAAAElFTkSuQmCC\n"
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "ht[['open','close','high']].plot()\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "'NoneType' object is not subscriptable\n",
      "'NoneType' object is not subscriptable\n",
      "'NoneType' object is not subscriptable\n"
     ]
    },
    {
     "ename": "OSError",
     "evalue": "获取失败，请检查网络.",
     "output_type": "error",
     "traceback": [
      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[1;31mOSError\u001B[0m                                   Traceback (most recent call last)",
      "\u001B[1;32m<ipython-input-52-e8cc851b9d76>\u001B[0m in \u001B[0;36m<module>\u001B[1;34m\u001B[0m\n\u001B[1;32m----> 1\u001B[1;33m \u001B[0mdf\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mts\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mget_today_ticks\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;34m'300274'\u001B[0m\u001B[1;33m)\u001B[0m            \u001B[1;31m#当日的历史分笔数据\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m      2\u001B[0m \u001B[0mdf\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mhead\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;36m10\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m      3\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n",
      "\u001B[1;32md:\\programdata\\conda3\\envs\\mllesson\\lib\\site-packages\\tushare\\stock\\trading.py\u001B[0m in \u001B[0;36mget_today_ticks\u001B[1;34m(code, retry_count, pause)\u001B[0m\n\u001B[0;32m   1291\u001B[0m         \u001B[1;32melse\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m   1292\u001B[0m             \u001B[1;32mreturn\u001B[0m \u001B[0mdf\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m-> 1293\u001B[1;33m     \u001B[1;32mraise\u001B[0m \u001B[0mIOError\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mct\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mNETWORK_URL_ERROR_MSG\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m   1294\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m   1295\u001B[0m bs_type = {'1':u'买入', \n",
      "\u001B[1;31mOSError\u001B[0m: 获取失败，请检查网络."
     ]
    }
   ],
   "source": [
    "df = ts.get_today_ticks('300274')            #当日的历史分笔数据\n",
    "df.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "None\n"
     ]
    },
    {
     "ename": "TypeError",
     "evalue": "'NoneType' object is not subscriptable",
     "output_type": "error",
     "traceback": [
      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[1;31mTypeError\u001B[0m                                 Traceback (most recent call last)",
      "\u001B[1;32m<ipython-input-53-31aaf3b10937>\u001B[0m in \u001B[0;36m<module>\u001B[1;34m\u001B[0m\n\u001B[0;32m      1\u001B[0m \u001B[0mhtbig\u001B[0m\u001B[1;33m=\u001B[0m \u001B[0mts\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mget_sina_dd\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;34m'300274'\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mdate\u001B[0m\u001B[1;33m=\u001B[0m\u001B[1;34m'2021-03-08'\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mvol\u001B[0m\u001B[1;33m=\u001B[0m\u001B[1;36m400\u001B[0m\u001B[1;33m)\u001B[0m  \u001B[1;31m#指定大于等于500手的数据\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m      2\u001B[0m \u001B[0mprint\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mhtbig\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m----> 3\u001B[1;33m \u001B[0mhtbig\u001B[0m\u001B[1;33m[\u001B[0m\u001B[1;34m'type'\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mvalue_counts\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m      4\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n",
      "\u001B[1;31mTypeError\u001B[0m: 'NoneType' object is not subscriptable"
     ]
    }
   ],
   "source": [
    "htbig= ts.get_sina_dd('300274', date='2021-03-08', vol=400)  #指定大于等于500手的数据\n",
    "print(htbig)\n",
    "htbig['type'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[4, 3, 2, 1]\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "data=pd.Series([1,2,3,4])\n",
    "data=list(data)\n",
    "data.reverse()\n",
    "print(data)\n",
    "\n",
    "# pd.DataFrame\n",
    "# pd.Series\n",
    "# np.array()\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}